From db88ff7365826033a11a2c9a7f2d1925c31e77d3 Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 8 Jun 2020 16:08:06 -0500 Subject: [PATCH 01/59] unit-test: Added simple unit test framework --- .gitignore | 9 +- .style.yapf | 2 + unit_test/golden/ARM_A9_2GHz.golden | 306 +++++++++++++ unit_test/golden/ARM_A9_2GHz_withIOC.golden | 391 +++++++++++++++++ unit_test/golden/Alpha21364.golden | 402 +++++++++++++++++ unit_test/golden/Penryn.golden | 355 +++++++++++++++ unit_test/golden/T1.golden | 333 ++++++++++++++ unit_test/golden/T1_DC_64.golden | 299 +++++++++++++ unit_test/golden/T1_SBT_64.golden | 281 ++++++++++++ unit_test/golden/T1_ST_64.golden | 299 +++++++++++++ unit_test/golden/T2.golden | 361 ++++++++++++++++ unit_test/golden/Xeon.golden | 387 +++++++++++++++++ unit_test/input/ARM_A9_2GHz.xml | 442 +++++++++++++++++++ unit_test/input/ARM_A9_2GHz_withIOC.xml | 442 +++++++++++++++++++ unit_test/input/Alpha21364.xml | 436 +++++++++++++++++++ unit_test/input/Niagara1.xml | 429 ++++++++++++++++++ unit_test/input/Niagara1_sharing_DC.xml | 440 +++++++++++++++++++ unit_test/input/Niagara1_sharing_SBT.xml | 441 +++++++++++++++++++ unit_test/input/Niagara1_sharing_ST.xml | 432 +++++++++++++++++++ unit_test/input/Niagara2.xml | 427 ++++++++++++++++++ unit_test/input/Penryn.xml | 446 +++++++++++++++++++ unit_test/input/Xeon.xml | 454 ++++++++++++++++++++ unit_test/unit_test.py | 150 +++++++ unit_test/unit_test.sh | 107 +++++ 24 files changed, 8070 insertions(+), 1 deletion(-) create mode 100644 .style.yapf create mode 100644 unit_test/golden/ARM_A9_2GHz.golden create mode 100644 unit_test/golden/ARM_A9_2GHz_withIOC.golden create mode 100644 unit_test/golden/Alpha21364.golden create mode 100644 unit_test/golden/Penryn.golden create mode 100644 unit_test/golden/T1.golden create mode 100644 unit_test/golden/T1_DC_64.golden create mode 100644 unit_test/golden/T1_SBT_64.golden create mode 100644 unit_test/golden/T1_ST_64.golden create mode 100644 unit_test/golden/T2.golden create mode 100644 unit_test/golden/Xeon.golden create mode 100644 unit_test/input/ARM_A9_2GHz.xml create mode 100644 unit_test/input/ARM_A9_2GHz_withIOC.xml create mode 100644 unit_test/input/Alpha21364.xml create mode 100644 unit_test/input/Niagara1.xml create mode 100644 unit_test/input/Niagara1_sharing_DC.xml create mode 100644 unit_test/input/Niagara1_sharing_SBT.xml create mode 100644 unit_test/input/Niagara1_sharing_ST.xml create mode 100644 unit_test/input/Niagara2.xml create mode 100644 unit_test/input/Penryn.xml create mode 100644 unit_test/input/Xeon.xml create mode 100755 unit_test/unit_test.py create mode 100755 unit_test/unit_test.sh diff --git a/.gitignore b/.gitignore index 2f66b66..f19f276 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,11 @@ # McPAT custom mcpat -obj_opt/ \ No newline at end of file +obj_opt/ + +# vim +.*.swo +.*.swp + +# Unit-Test +unit_test/output diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..fdd0723 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,2 @@ +[style] +based_on_style = yapf diff --git a/unit_test/golden/ARM_A9_2GHz.golden b/unit_test/golden/ARM_A9_2GHz.golden new file mode 100644 index 0000000..8ccc92a --- /dev/null +++ b/unit_test/golden/ARM_A9_2GHz.golden @@ -0,0 +1,306 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 40 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= conservative interconnect technology projection + Core clock Rate(MHz) 2000 + +***************************************************************************************** +Processor: + Area = 5.39698 mm^2 + Peak Power = 1.74189 W + Total Leakage = 0.108687 W + Peak Dynamic = 1.6332 W + Subthreshold Leakage = 0.0523094 W + Gate Leakage = 0.0563774 W + Runtime Dynamic = 2.96053 W + + Total Cores: 2 cores + Device Type= ITRS low operating power device type + Area = 4.84735 mm^2 + Peak Dynamic = 1.57159 W + Subthreshold Leakage = 0.0484486 W + Gate Leakage = 0.0501375 W + Runtime Dynamic = 1.06575 W + + Total First Level Directory: + Device Type= ITRS low operating power device type + Area = 0.535391 mm^2 + Peak Dynamic = 0.045212 W + Subthreshold Leakage = 0.00370577 W + Gate Leakage = 0.0060234 W + Runtime Dynamic = 1.81276 W + + Total NoCs (Network/Bus): + Device Type= ITRS low operating power device type + Area = 0.014239 mm^2 + Peak Dynamic = 0.0164048 W + Subthreshold Leakage = 0.000155022 W + Gate Leakage = 0.000216526 W + Runtime Dynamic = 0.0820239 W + +***************************************************************************************** +Core: + Area = 2.42367 mm^2 + Peak Dynamic = 0.785793 W + Subthreshold Leakage = 0.0242243 W + Gate Leakage = 0.0250688 W + Runtime Dynamic = 1.06575 W + + Instruction Fetch Unit: + Area = 0.524458 mm^2 + Peak Dynamic = 0.336176 W + Subthreshold Leakage = 0.0040912 W + Gate Leakage = 0.00274435 W + Runtime Dynamic = 0.328993 W + + Instruction Cache: + Area = 0.320478 mm^2 + Peak Dynamic = 0.0593282 W + Subthreshold Leakage = 0.0027081 W + Gate Leakage = 0.00194265 W + Runtime Dynamic = 0.0375516 W + + Branch Target Buffer: + Area = 0.123663 mm^2 + Peak Dynamic = 0.0112507 W + Subthreshold Leakage = 0.000676313 W + Gate Leakage = 0.000233814 W + Runtime Dynamic = 0.0450027 W + + Branch Predictor: + Area = 0.0651588 mm^2 + Peak Dynamic = 0.00901157 W + Subthreshold Leakage = 0.000528583 W + Gate Leakage = 0.000363456 W + Runtime Dynamic = 0.012543 W + + Global Predictor: + Area = 0.0314284 mm^2 + Peak Dynamic = 0.0040607 W + Subthreshold Leakage = 0.000255788 W + Gate Leakage = 0.000170482 W + Runtime Dynamic = 0.00593998 W + + Local Predictor: + L1_Local Predictor: + Area = 0.000689664 mm^2 + Peak Dynamic = 0.000229924 W + Subthreshold Leakage = 5.67112e-06 W + Gate Leakage = 8.73898e-06 W + Runtime Dynamic = 0.00037915 W + + L2_Local Predictor: + Area = 0.000786361 mm^2 + Peak Dynamic = 0.000176096 W + Subthreshold Leakage = 6.4167e-06 W + Gate Leakage = 9.51543e-06 W + Runtime Dynamic = 0.000283779 W + + Chooser: + Area = 0.0314284 mm^2 + Peak Dynamic = 0.0040607 W + Subthreshold Leakage = 0.000255788 W + Gate Leakage = 0.000170482 W + Runtime Dynamic = 0.00593998 W + + RAS: + Area = 0.000825971 mm^2 + Peak Dynamic = 0.00048414 W + Subthreshold Leakage = 4.91968e-06 W + Gate Leakage = 4.2374e-06 W + Runtime Dynamic = 7.87561e-08 W + + Instruction Buffer: + Area = 0.00882485 mm^2 + Peak Dynamic = 0.195493 W + Subthreshold Leakage = 6.22343e-05 W + Gate Leakage = 4.96656e-05 W + Runtime Dynamic = 0.11171 W + + Instruction Decoder: + Area = 0.00468731 mm^2 + Peak Dynamic = 0.05881 W + Subthreshold Leakage = 9.40317e-05 W + Gate Leakage = 8.38587e-05 W + Runtime Dynamic = 0.11762 W + + Renaming Unit: + Area = 0.0160659 mm^2 + Peak Dynamic = 0.0239515 W + Subthreshold Leakage = 5.90215e-05 W + Gate Leakage = 0.000116732 W + Runtime Dynamic = 0.0428516 W + + Int Front End RAT with 1 internal checkpoints: + Area = 0.00390336 mm^2 + Peak Dynamic = 0.011046 W + Subthreshold Leakage = 9.95256e-06 W + Gate Leakage = 1.09741e-05 W + Runtime Dynamic = 0.0209685 W + + FP Front End RAT with 1 internal checkpoints: + Area = 0.00203071 mm^2 + Peak Dynamic = 0.00488971 W + Subthreshold Leakage = 8.31122e-06 W + Gate Leakage = 1.20567e-05 W + Runtime Dynamic = 0.00458901 W + + Free List: + Area = 0.00474032 mm^2 + Peak Dynamic = 0.00315747 W + Subthreshold Leakage = 9.5856e-06 W + Gate Leakage = 9.87904e-06 W + Runtime Dynamic = 0.0102529 W + + FP Free List: + Area = 0.00374541 mm^2 + Peak Dynamic = 0.00232188 W + Subthreshold Leakage = 9.23741e-06 W + Gate Leakage = 1.29204e-05 W + Runtime Dynamic = 0.00196849 W + + Load Store Unit: + Area = 0.373209 mm^2 + Peak Dynamic = 0.0755208 W + Subthreshold Leakage = 0.00291231 W + Gate Leakage = 0.00223072 W + Runtime Dynamic = 0.178854 W + + Data Cache: + Area = 0.328266 mm^2 + Peak Dynamic = 0.0629305 W + Subthreshold Leakage = 0.00280345 W + Gate Leakage = 0.00205765 W + Runtime Dynamic = 0.160056 W + + StoreQ: + Area = 0.00951832 mm^2 + Peak Dynamic = 0.0107895 W + Subthreshold Leakage = 8.69175e-05 W + Gate Leakage = 0.000102166 W + Runtime Dynamic = 0.0151964 W + + Memory Management Unit: + Area = 0.0235719 mm^2 + Peak Dynamic = 0.0134081 W + Subthreshold Leakage = 0.00020835 W + Runtime Dynamic = 0.0462239 W + + Itlb: + Area = 0.0109629 mm^2 + Peak Dynamic = 0.00628297 W + Subthreshold Leakage = 9.32078e-05 W + Gate Leakage = 0.00010099 W + Runtime Dynamic = 0.0139623 W + + Dtlb: + Area = 0.0109629 mm^2 + Peak Dynamic = 0.00495657 W + Subthreshold Leakage = 9.32078e-05 W + Gate Leakage = 0.00010099 W + Runtime Dynamic = 0.0279245 W + + Execution Unit: + Area = 1.4265 mm^2 + Peak Dynamic = 0.336736 W + Subthreshold Leakage = 0.0159605 W + Runtime Dynamic = 0.468828 W + + Register Files: + Area = 0.172904 mm^2 + Peak Dynamic = 0.110354 W + Subthreshold Leakage = 0.00021843 W + Gate Leakage = 0.000203433 W + Runtime Dynamic = 0.0518787 W + + Integer RF: + Area = 0.123956 mm^2 + Peak Dynamic = 0.107469 W + Subthreshold Leakage = 0.000140392 W + Gate Leakage = 0.00013123 W + Runtime Dynamic = 0.0467274 W + + Floating Point RF: + Area = 0.0489472 mm^2 + Peak Dynamic = 0.00288482 W + Subthreshold Leakage = 7.80381e-05 W + Gate Leakage = 7.22038e-05 W + Runtime Dynamic = 0.00515143 W + + Instruction Scheduler: + Area = 0.0675611 mm^2 + Peak Dynamic = 0.0813923 W + Subthreshold Leakage = 0.000343788 W + Gate Leakage = 0.000423972 W + Runtime Dynamic = 0.122921 W + + Instruction Window: + Area = 0.0621798 mm^2 + Peak Dynamic = 0.0677695 W + Subthreshold Leakage = 0.000290616 W + Gate Leakage = 0.000350863 W + Runtime Dynamic = 0.0891981 W + + FP Instruction Window: + Area = 0.00538133 mm^2 + Peak Dynamic = 0.0136229 W + Subthreshold Leakage = 5.31726e-05 W + Gate Leakage = 7.3109e-05 W + Runtime Dynamic = 0.0337229 W + + Integer ALUs (Count: 3 ): + Area = 0.104135 mm^2 + Peak Dynamic = 0.0283684 W + Subthreshold Leakage = 0.00345415 W + Gate Leakage = 0.0040105 W + Runtime Dynamic = 0.0373268 W + + Floating Point Units (FPUs) (Count: 1 ): + Area = 0.971259 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00805417 W + Gate Leakage = 0.00935142 W + Runtime Dynamic = 0.0373268 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.104135 mm^2 + Peak Dynamic = 0.0204053 W + Subthreshold Leakage = 0.00345415 W + Gate Leakage = 0.0040105 W + Runtime Dynamic = 0.049769 W + + Results Broadcast Bus: + Area Overhead = 0.00486214 mm^2 + Peak Dynamic = 0.0942884 W + Subthreshold Leakage = 0.000413867 W + Gate Leakage = 0.000480526 W + Runtime Dynamic = 0.16575 W + +***************************************************************************************** +First Level Directory + Area = 0.267696 mm^2 + Peak Dynamic = 0.022606 W + Subthreshold Leakage = 0.00185288 W + Gate Leakage = 0.0030117 W + Runtime Dynamic = 1.81276 W + +***************************************************************************************** +BUSES + Area = 0.014239 mm^2 + Peak Dynamic = 0.0164048 W + Subthreshold Leakage = 0.000155022 W + Gate Leakage = 0.000216526 W + Runtime Dynamic = 0.0820239 W + + Bus: + Area = 0.014239 mm^2 + Peak Dynamic = 0.0164048 W + Subthreshold Leakage = 0.000155022 W + Gate Leakage = 0.000216526 W + Runtime Dynamic = 0.0820239 W + +***************************************************************************************** diff --git a/unit_test/golden/ARM_A9_2GHz_withIOC.golden b/unit_test/golden/ARM_A9_2GHz_withIOC.golden new file mode 100644 index 0000000..ec0908f --- /dev/null +++ b/unit_test/golden/ARM_A9_2GHz_withIOC.golden @@ -0,0 +1,391 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 40 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= conservative interconnect technology projection + Core clock Rate(MHz) 2000 + +***************************************************************************************** +Processor: + Area = 6.59091 mm^2 + Peak Power = 2.49531 W + Total Leakage = 0.125177 W + Peak Dynamic = 2.37014 W + Subthreshold Leakage = 0.0559042 W + Gate Leakage = 0.0692724 W + Runtime Dynamic = 5.57062 W + + Total Cores: 2 cores + Device Type= ITRS low operating power device type + Area = 4.84735 mm^2 + Peak Dynamic = 1.57159 W + Subthreshold Leakage = 0.0484486 W + Gate Leakage = 0.0501375 W + Runtime Dynamic = 1.06575 W + + Total First Level Directory: + Device Type= ITRS low operating power device type + Area = 0.535391 mm^2 + Peak Dynamic = 0.045212 W + Subthreshold Leakage = 0.00370577 W + Gate Leakage = 0.0060234 W + Runtime Dynamic = 1.81276 W + + Total NoCs (Network/Bus): + Device Type= ITRS low operating power device type + Area = 0.0157374 mm^2 + Peak Dynamic = 0.018131 W + Subthreshold Leakage = 0.000171335 W + Gate Leakage = 0.00023931 W + Runtime Dynamic = 0.0906552 W + + Total MCs: 1 Memory Controllers + Device Type= ITRS low operating power device type + Area = 0.529714 mm^2 + Peak Dynamic = 0.324983 W + Subthreshold Leakage = 0.00165926 W + Gate Leakage = 0.00561872 W + Runtime Dynamic = 2.31429 W + + Total Flash/SSD Controllers: 1 Flash/SSD Controllers + Device Type= ITRS low operating power device type + Area = 0.109065 mm^2 + Peak Dynamic = 0.0299827 W + Subthreshold Leakage = 0.000384542 W + Gate Leakage = 0.00145326 W + Runtime Dynamic = 0.0209879 W + + Total NIUs: 1 Network Interface Units + Device Type= ITRS low operating power device type + Area = 0.261302 mm^2 + Peak Dynamic = 0.164859 W + Subthreshold Leakage = 0.000537676 W + Gate Leakage = 0.00203199 W + Runtime Dynamic = 0.115402 W + + Total PCIes: 1 PCIe Controllers + Device Type= ITRS low operating power device type + Area = 0.292355 mm^2 + Peak Dynamic = 0.215383 W + Subthreshold Leakage = 0.000997081 W + Gate Leakage = 0.00376817 W + Runtime Dynamic = 0.150768 W + +***************************************************************************************** +Core: + Area = 2.42367 mm^2 + Peak Dynamic = 0.785793 W + Subthreshold Leakage = 0.0242243 W + Gate Leakage = 0.0250688 W + Runtime Dynamic = 1.06575 W + + Instruction Fetch Unit: + Area = 0.524458 mm^2 + Peak Dynamic = 0.336176 W + Subthreshold Leakage = 0.0040912 W + Gate Leakage = 0.00274435 W + Runtime Dynamic = 0.328993 W + + Instruction Cache: + Area = 0.320478 mm^2 + Peak Dynamic = 0.0593282 W + Subthreshold Leakage = 0.0027081 W + Gate Leakage = 0.00194265 W + Runtime Dynamic = 0.0375516 W + + Branch Target Buffer: + Area = 0.123663 mm^2 + Peak Dynamic = 0.0112507 W + Subthreshold Leakage = 0.000676313 W + Gate Leakage = 0.000233814 W + Runtime Dynamic = 0.0450027 W + + Branch Predictor: + Area = 0.0651588 mm^2 + Peak Dynamic = 0.00901157 W + Subthreshold Leakage = 0.000528583 W + Gate Leakage = 0.000363456 W + Runtime Dynamic = 0.012543 W + + Global Predictor: + Area = 0.0314284 mm^2 + Peak Dynamic = 0.0040607 W + Subthreshold Leakage = 0.000255788 W + Gate Leakage = 0.000170482 W + Runtime Dynamic = 0.00593998 W + + Local Predictor: + L1_Local Predictor: + Area = 0.000689664 mm^2 + Peak Dynamic = 0.000229924 W + Subthreshold Leakage = 5.67112e-06 W + Gate Leakage = 8.73898e-06 W + Runtime Dynamic = 0.00037915 W + + L2_Local Predictor: + Area = 0.000786361 mm^2 + Peak Dynamic = 0.000176096 W + Subthreshold Leakage = 6.4167e-06 W + Gate Leakage = 9.51543e-06 W + Runtime Dynamic = 0.000283779 W + + Chooser: + Area = 0.0314284 mm^2 + Peak Dynamic = 0.0040607 W + Subthreshold Leakage = 0.000255788 W + Gate Leakage = 0.000170482 W + Runtime Dynamic = 0.00593998 W + + RAS: + Area = 0.000825971 mm^2 + Peak Dynamic = 0.00048414 W + Subthreshold Leakage = 4.91968e-06 W + Gate Leakage = 4.2374e-06 W + Runtime Dynamic = 7.87561e-08 W + + Instruction Buffer: + Area = 0.00882485 mm^2 + Peak Dynamic = 0.195493 W + Subthreshold Leakage = 6.22343e-05 W + Gate Leakage = 4.96656e-05 W + Runtime Dynamic = 0.11171 W + + Instruction Decoder: + Area = 0.00468731 mm^2 + Peak Dynamic = 0.05881 W + Subthreshold Leakage = 9.40317e-05 W + Gate Leakage = 8.38587e-05 W + Runtime Dynamic = 0.11762 W + + Renaming Unit: + Area = 0.0160659 mm^2 + Peak Dynamic = 0.0239515 W + Subthreshold Leakage = 5.90215e-05 W + Gate Leakage = 0.000116732 W + Runtime Dynamic = 0.0428516 W + + Int Front End RAT with 1 internal checkpoints: + Area = 0.00390336 mm^2 + Peak Dynamic = 0.011046 W + Subthreshold Leakage = 9.95256e-06 W + Gate Leakage = 1.09741e-05 W + Runtime Dynamic = 0.0209685 W + + FP Front End RAT with 1 internal checkpoints: + Area = 0.00203071 mm^2 + Peak Dynamic = 0.00488971 W + Subthreshold Leakage = 8.31122e-06 W + Gate Leakage = 1.20567e-05 W + Runtime Dynamic = 0.00458901 W + + Free List: + Area = 0.00474032 mm^2 + Peak Dynamic = 0.00315747 W + Subthreshold Leakage = 9.5856e-06 W + Gate Leakage = 9.87904e-06 W + Runtime Dynamic = 0.0102529 W + + FP Free List: + Area = 0.00374541 mm^2 + Peak Dynamic = 0.00232188 W + Subthreshold Leakage = 9.23741e-06 W + Gate Leakage = 1.29204e-05 W + Runtime Dynamic = 0.00196849 W + + Load Store Unit: + Area = 0.373209 mm^2 + Peak Dynamic = 0.0755208 W + Subthreshold Leakage = 0.00291231 W + Gate Leakage = 0.00223072 W + Runtime Dynamic = 0.178854 W + + Data Cache: + Area = 0.328266 mm^2 + Peak Dynamic = 0.0629305 W + Subthreshold Leakage = 0.00280345 W + Gate Leakage = 0.00205765 W + Runtime Dynamic = 0.160056 W + + StoreQ: + Area = 0.00951832 mm^2 + Peak Dynamic = 0.0107895 W + Subthreshold Leakage = 8.69175e-05 W + Gate Leakage = 0.000102166 W + Runtime Dynamic = 0.0151964 W + + Memory Management Unit: + Area = 0.0235719 mm^2 + Peak Dynamic = 0.0134081 W + Subthreshold Leakage = 0.00020835 W + Runtime Dynamic = 0.0462239 W + + Itlb: + Area = 0.0109629 mm^2 + Peak Dynamic = 0.00628297 W + Subthreshold Leakage = 9.32078e-05 W + Gate Leakage = 0.00010099 W + Runtime Dynamic = 0.0139623 W + + Dtlb: + Area = 0.0109629 mm^2 + Peak Dynamic = 0.00495657 W + Subthreshold Leakage = 9.32078e-05 W + Gate Leakage = 0.00010099 W + Runtime Dynamic = 0.0279245 W + + Execution Unit: + Area = 1.4265 mm^2 + Peak Dynamic = 0.336736 W + Subthreshold Leakage = 0.0159605 W + Runtime Dynamic = 0.468828 W + + Register Files: + Area = 0.172904 mm^2 + Peak Dynamic = 0.110354 W + Subthreshold Leakage = 0.00021843 W + Gate Leakage = 0.000203433 W + Runtime Dynamic = 0.0518787 W + + Integer RF: + Area = 0.123956 mm^2 + Peak Dynamic = 0.107469 W + Subthreshold Leakage = 0.000140392 W + Gate Leakage = 0.00013123 W + Runtime Dynamic = 0.0467274 W + + Floating Point RF: + Area = 0.0489472 mm^2 + Peak Dynamic = 0.00288482 W + Subthreshold Leakage = 7.80381e-05 W + Gate Leakage = 7.22038e-05 W + Runtime Dynamic = 0.00515143 W + + Instruction Scheduler: + Area = 0.0675611 mm^2 + Peak Dynamic = 0.0813923 W + Subthreshold Leakage = 0.000343788 W + Gate Leakage = 0.000423972 W + Runtime Dynamic = 0.122921 W + + Instruction Window: + Area = 0.0621798 mm^2 + Peak Dynamic = 0.0677695 W + Subthreshold Leakage = 0.000290616 W + Gate Leakage = 0.000350863 W + Runtime Dynamic = 0.0891981 W + + FP Instruction Window: + Area = 0.00538133 mm^2 + Peak Dynamic = 0.0136229 W + Subthreshold Leakage = 5.31726e-05 W + Gate Leakage = 7.3109e-05 W + Runtime Dynamic = 0.0337229 W + + Integer ALUs (Count: 3 ): + Area = 0.104135 mm^2 + Peak Dynamic = 0.0283684 W + Subthreshold Leakage = 0.00345415 W + Gate Leakage = 0.0040105 W + Runtime Dynamic = 0.0373268 W + + Floating Point Units (FPUs) (Count: 1 ): + Area = 0.971259 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00805417 W + Gate Leakage = 0.00935142 W + Runtime Dynamic = 0.0373268 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.104135 mm^2 + Peak Dynamic = 0.0204053 W + Subthreshold Leakage = 0.00345415 W + Gate Leakage = 0.0040105 W + Runtime Dynamic = 0.049769 W + + Results Broadcast Bus: + Area Overhead = 0.00486214 mm^2 + Peak Dynamic = 0.0942884 W + Subthreshold Leakage = 0.000413867 W + Gate Leakage = 0.000480526 W + Runtime Dynamic = 0.16575 W + +***************************************************************************************** +First Level Directory + Area = 0.267696 mm^2 + Peak Dynamic = 0.022606 W + Subthreshold Leakage = 0.00185288 W + Gate Leakage = 0.0030117 W + Runtime Dynamic = 1.81276 W + +***************************************************************************************** +Memory Controller: + Area = 0.529714 mm^2 + Peak Dynamic = 0.324983 W + Subthreshold Leakage = 0.00165926 W + Gate Leakage = 0.00561872 W + Runtime Dynamic = 2.31429 W + + Front End Engine: + Area = 0.0869779 mm^2 + Peak Dynamic = 0.0264175 W + Subthreshold Leakage = 0.000257111 W + Gate Leakage = 0.000319726 W + Runtime Dynamic = 0.178757 W + + Transaction Engine: + Area = 0.113609 mm^2 + Peak Dynamic = 0.160252 W + Subthreshold Leakage = 0.000280429 W + Gate Leakage = 0.0010598 W + Runtime Dynamic = 1.08436 W + + PHY: + Area = 0.329127 mm^2 + Peak Dynamic = 0.138314 W + Subthreshold Leakage = 0.00112172 W + Gate Leakage = 0.0042392 W + Runtime Dynamic = 1.05117 W + +***************************************************************************************** +Flash Controller: + Area = 0.109065 mm^2 + Peak Dynamic = 0.0299827 W + Subthreshold Leakage = 0.000384542 W + Gate Leakage = 0.00145326 W + Runtime Dynamic = 0.0209879 W + +***************************************************************************************** +NIU: + Area = 0.261302 mm^2 + Peak Dynamic = 0.164859 W + Subthreshold Leakage = 0.000537676 W + Gate Leakage = 0.00203199 W + Runtime Dynamic = 0.115402 W + +***************************************************************************************** +PCIe: + Area = 0.292355 mm^2 + Peak Dynamic = 0.215383 W + Subthreshold Leakage = 0.000997081 W + Gate Leakage = 0.00376817 W + Runtime Dynamic = 0.150768 W + +***************************************************************************************** +BUSES + Area = 0.0157374 mm^2 + Peak Dynamic = 0.018131 W + Subthreshold Leakage = 0.000171335 W + Gate Leakage = 0.00023931 W + Runtime Dynamic = 0.0906552 W + + Bus: + Area = 0.0157374 mm^2 + Peak Dynamic = 0.018131 W + Subthreshold Leakage = 0.000171335 W + Gate Leakage = 0.00023931 W + Runtime Dynamic = 0.0906552 W + +***************************************************************************************** diff --git a/unit_test/golden/Alpha21364.golden b/unit_test/golden/Alpha21364.golden new file mode 100644 index 0000000..566fee7 --- /dev/null +++ b/unit_test/golden/Alpha21364.golden @@ -0,0 +1,402 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + +Warning: icache array structure cannot satisfy throughput constraint. +Warning: icache array structure cannot satisfy latency constraint. +Warning: Branch Target Buffer array structure cannot satisfy throughput constraint. +Warning: Branch Target Buffer array structure cannot satisfy latency constraint. +Warning: Global Predictor array structure cannot satisfy throughput constraint. +Warning: Global Predictor array structure cannot satisfy latency constraint. +Warning: Predictor Chooser array structure cannot satisfy throughput constraint. +Warning: Predictor Chooser array structure cannot satisfy latency constraint. +Warning: dcache array structure cannot satisfy throughput constraint. +Warning: dcache array structure cannot satisfy latency constraint. + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 180 nm + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 1200 + +***************************************************************************************** +Processor: + Area = 311.691 mm^2 + Peak Power = 86.0307 W + Total Leakage = 0.143326 W + Peak Dynamic = 85.8874 W + Subthreshold Leakage = 0.139369 W + Gate Leakage = 0.00395711 W + Runtime Dynamic = 83.6833 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 127.712 mm^2 + Peak Dynamic = 51.6216 W + Subthreshold Leakage = 0.0554561 W + Gate Leakage = 0.00345586 W + Runtime Dynamic = 72.3661 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 135.613 mm^2 + Peak Dynamic = 4.76002 W + Subthreshold Leakage = 0.0784831 W + Gate Leakage = 0.000175445 W + Runtime Dynamic = 6.35901 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 1.67179 mm^2 + Peak Dynamic = 0.826596 W + Subthreshold Leakage = 0.000382801 W + Gate Leakage = 3.06958e-05 W + Runtime Dynamic = 0.572134 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 28.9377 mm^2 + Peak Dynamic = 14.9356 W + Subthreshold Leakage = 0.00280385 W + Gate Leakage = 0.000158855 W + Runtime Dynamic = 2.31478 W + + Total MCs: 2 Memory Controllers + Device Type= ITRS high performance device type + Area = 17.7563 mm^2 + Peak Dynamic = 13.7436 W + Subthreshold Leakage = 0.00224319 W + Gate Leakage = 0.000136259 W + Runtime Dynamic = 2.07121 W + +***************************************************************************************** +Core: + Area = 127.712 mm^2 + Peak Dynamic = 51.6216 W + Subthreshold Leakage = 0.0554561 W + Gate Leakage = 0.00345586 W + Runtime Dynamic = 72.3661 W + + Instruction Fetch Unit: + Area = 27.2036 mm^2 + Peak Dynamic = 8.67944 W + Subthreshold Leakage = 0.00635173 W + Gate Leakage = 0.000348289 W + Runtime Dynamic = 10.3092 W + + Instruction Cache: + Area = 11.4744 mm^2 + Peak Dynamic = 1.29115 W + Subthreshold Leakage = 0.00391764 W + Gate Leakage = 0.000183553 W + Runtime Dynamic = 1.89991 W + + Branch Target Buffer: + Area = 13.1967 mm^2 + Peak Dynamic = 0.548968 W + Subthreshold Leakage = 0.00150571 W + Gate Leakage = 9.06003e-05 W + Runtime Dynamic = 2.19587 W + + Branch Predictor: + Area = 1.75712 mm^2 + Peak Dynamic = 0.239897 W + Subthreshold Leakage = 0.000500353 W + Gate Leakage = 1.95603e-05 W + Runtime Dynamic = 0.229419 W + + Global Predictor: + Area = 0.607456 mm^2 + Peak Dynamic = 0.0825791 W + Subthreshold Leakage = 0.000184661 W + Gate Leakage = 7.36458e-06 W + Runtime Dynamic = 0.0887205 W + + Local Predictor: + L1_Local Predictor: + Area = 0.282223 mm^2 + Peak Dynamic = 0.0303388 W + Subthreshold Leakage = 7.96913e-05 W + Gate Leakage = 2.92127e-06 W + Runtime Dynamic = 0.0344156 W + + L2_Local Predictor: + Area = 0.177264 mm^2 + Peak Dynamic = 0.0154884 W + Subthreshold Leakage = 4.01694e-05 W + Gate Leakage = 1.47445e-06 W + Runtime Dynamic = 0.0175587 W + + Chooser: + Area = 0.607456 mm^2 + Peak Dynamic = 0.0825791 W + Subthreshold Leakage = 0.000184661 W + Gate Leakage = 7.36458e-06 W + Runtime Dynamic = 0.0887205 W + + RAS: + Area = 0.0827205 mm^2 + Peak Dynamic = 0.0289113 W + Subthreshold Leakage = 1.11699e-05 W + Gate Leakage = 4.35453e-07 W + Runtime Dynamic = 3.56984e-06 W + + Instruction Buffer: + Area = 0.198955 mm^2 + Peak Dynamic = 1.8463 W + Subthreshold Leakage = 3.21125e-05 W + Gate Leakage = 1.24349e-06 W + Runtime Dynamic = 1.23087 W + + Instruction Decoder: + Area = 0.146031 mm^2 + Peak Dynamic = 4.07384 W + Subthreshold Leakage = 7.07416e-05 W + Gate Leakage = 3.32268e-06 W + Runtime Dynamic = 4.07384 W + + Renaming Unit: + Area = 3.10545 mm^2 + Peak Dynamic = 8.42851 W + Subthreshold Leakage = 0.000515523 W + Gate Leakage = 6.13515e-05 W + Runtime Dynamic = 7.34458 W + + Int Front End RAT with 8 internal checkpoints: + Area = 1.53495 mm^2 + Peak Dynamic = 4.72424 W + Subthreshold Leakage = 0.00011933 W + Gate Leakage = 7.05732e-06 W + Runtime Dynamic = 4.69049 W + + FP Front End RAT with 8 internal checkpoints: + Area = 0.504792 mm^2 + Peak Dynamic = 2.16672 W + Subthreshold Leakage = 5.19429e-05 W + Gate Leakage = 3.17329e-06 W + Runtime Dynamic = 1.07269 W + + Free List: + Area = 0.439651 mm^2 + Peak Dynamic = 0.101065 W + Subthreshold Leakage = 1.12468e-05 W + Gate Leakage = 6.40491e-07 W + Runtime Dynamic = 0.186985 W + + FP Free List: + Area = 0.195722 mm^2 + Peak Dynamic = 0.0779159 W + Subthreshold Leakage = 7.82681e-06 W + Gate Leakage = 4.71127e-07 W + Runtime Dynamic = 0.035853 W + + Load Store Unit: + Area = 48.6535 mm^2 + Peak Dynamic = 8.72462 W + Subthreshold Leakage = 0.00719447 W + Gate Leakage = 0.000429292 W + Runtime Dynamic = 28.0438 W + + Data Cache: + Area = 38.3148 mm^2 + Peak Dynamic = 6.18063 W + Subthreshold Leakage = 0.00624994 W + Gate Leakage = 0.000342058 W + Runtime Dynamic = 25.966 W + + LoadQ: + Area = 2.76221 mm^2 + Peak Dynamic = 0.932355 W + Subthreshold Leakage = 0.000309677 W + Gate Leakage = 1.86125e-05 W + Runtime Dynamic = 0.466177 W + + StoreQ: + Area = 2.76221 mm^2 + Peak Dynamic = 0.932355 W + Subthreshold Leakage = 0.000309677 W + Gate Leakage = 1.86125e-05 W + Runtime Dynamic = 0.932355 W + + Memory Management Unit: + Area = 10.917 mm^2 + Peak Dynamic = 2.6167 W + Subthreshold Leakage = 0.00171591 W + Runtime Dynamic = 7.41009 W + + Itlb: + Area = 3.51477 mm^2 + Peak Dynamic = 0.592001 W + Subthreshold Leakage = 0.000593841 W + Gate Leakage = 4.98191e-05 W + Runtime Dynamic = 2.36803 W + + Dtlb: + Area = 6.97195 mm^2 + Peak Dynamic = 1.00578 W + Subthreshold Leakage = 0.000796896 W + Gate Leakage = 6.81599e-05 W + Runtime Dynamic = 4.02315 W + + Execution Unit: + Area = 29.3084 mm^2 + Peak Dynamic = 23.1724 W + Subthreshold Leakage = 0.0199822 W + Runtime Dynamic = 19.2585 W + + Register Files: + Area = 9.92473 mm^2 + Peak Dynamic = 3.89176 W + Subthreshold Leakage = 0.000278344 W + Gate Leakage = 1.26653e-05 W + Runtime Dynamic = 1.64629 W + + Integer RF: + Area = 6.75967 mm^2 + Peak Dynamic = 2.2861 W + Subthreshold Leakage = 0.000172855 W + Gate Leakage = 7.93675e-06 W + Runtime Dynamic = 1.46712 W + + Floating Point RF: + Area = 3.16506 mm^2 + Peak Dynamic = 1.60566 W + Subthreshold Leakage = 0.000105489 W + Gate Leakage = 4.72854e-06 W + Runtime Dynamic = 0.179171 W + + Instruction Scheduler: + Area = 5.25992 mm^2 + Peak Dynamic = 3.23778 W + Subthreshold Leakage = 0.000280877 W + Gate Leakage = 1.5515e-05 W + Runtime Dynamic = 3.69049 W + + Instruction Window: + Area = 1.27449 mm^2 + Peak Dynamic = 1.1685 W + Subthreshold Leakage = 9.74811e-05 W + Gate Leakage = 6.3522e-06 W + Runtime Dynamic = 1.46828 W + + FP Instruction Window: + Area = 0.510329 mm^2 + Peak Dynamic = 0.68792 W + Subthreshold Leakage = 6.32652e-05 W + Gate Leakage = 4.28314e-06 W + Runtime Dynamic = 0.840837 W + + ROB: + Area = 3.47511 mm^2 + Peak Dynamic = 1.38136 W + Subthreshold Leakage = 0.000120131 W + Gate Leakage = 4.87965e-06 W + Runtime Dynamic = 1.38136 W + + Integer ALUs (Count: 4 ): + Area = 0.8736 mm^2 + Peak Dynamic = 4.23312 W + Subthreshold Leakage = 0.00403726 W + Gate Leakage = 0.000246721 W + Runtime Dynamic = 3.21343 W + + Floating Point Units (FPUs) (Count: 1 ): + Area = 12.705 mm^2 + Peak Dynamic = 3.52215 W + Subthreshold Leakage = 0.0146787 W + Gate Leakage = 0.000897034 W + Runtime Dynamic = 3.52215 W + + Results Broadcast Bus: + Area Overhead = 0.114821 mm^2 + Peak Dynamic = 6.929 W + Subthreshold Leakage = 0.000381852 W + Gate Leakage = 2.33354e-05 W + Runtime Dynamic = 5.82754 W + +***************************************************************************************** +L2 + Area = 135.613 mm^2 + Peak Dynamic = 4.76002 W + Subthreshold Leakage = 0.0784831 W + Gate Leakage = 0.000175445 W + Runtime Dynamic = 6.35901 W + +***************************************************************************************** +Second Level Directory + Area = 1.67179 mm^2 + Peak Dynamic = 0.826596 W + Subthreshold Leakage = 0.000382801 W + Gate Leakage = 3.06958e-05 W + Runtime Dynamic = 0.572134 W + +***************************************************************************************** +Memory Controller: + Area = 8.87813 mm^2 + Peak Dynamic = 6.8718 W + Subthreshold Leakage = 0.00112159 W + Gate Leakage = 6.81295e-05 W + Runtime Dynamic = 2.07121 W + + Front End Engine: + Area = 5.24545 mm^2 + Peak Dynamic = 3.02969 W + Subthreshold Leakage = 0.000282187 W + Gate Leakage = 1.68325e-05 W + Runtime Dynamic = 0.692367 W + + Transaction Engine: + Area = 1.50616 mm^2 + Peak Dynamic = 3.55926 W + Subthreshold Leakage = 0.000348029 W + Gate Leakage = 2.12684e-05 W + Runtime Dynamic = 1.06774 W + + PHY: + Area = 2.12653 mm^2 + Peak Dynamic = 0.282843 W + Subthreshold Leakage = 0.000491377 W + Gate Leakage = 3.00286e-05 W + Runtime Dynamic = 0.311102 W + +***************************************************************************************** +NOC + Area = 28.9377 mm^2 + Peak Dynamic = 14.9356 W + Subthreshold Leakage = 0.00280385 W + Gate Leakage = 0.000158855 W + Runtime Dynamic = 2.31478 W + + Router: + Area = 28.2648 mm^2 + Peak Dynamic = 7.32833 W + Subthreshold Leakage = 0.00189552 W + Gate Leakage = 0.000103346 W + Runtime Dynamic = 1.0469 W + + Virtual Channel Buffer: + Area = 16.9497 mm^2 + Peak Dynamic = 5.86693 W + Subthreshold Leakage = 0.00109244 W + Gate Leakage = 3.51874e-05 W + Runtime Dynamic = 0.838133 W + + Crossbar: + Area = 0.357655 mm^2 + Peak Dynamic = 1.27997 W + Subthreshold Leakage = 0.000801415 W + Gate Leakage = 6.80527e-05 W + Runtime Dynamic = 0.182853 W + + Arbiter: + Peak Dynamic = 0.181431 W + Subthreshold Leakage = 1.65956e-06 W + Gate Leakage = 1.05559e-07 W + Runtime Dynamic = 0.0259187 W + + Per Router Links: + Area = 0.672964 mm^2 + Peak Dynamic = 7.60724 W + Subthreshold Leakage = 0.000908331 W + Gate Leakage = 5.55091e-05 W + Runtime Dynamic = 1.26787 W + +***************************************************************************************** diff --git a/unit_test/golden/Penryn.golden b/unit_test/golden/Penryn.golden new file mode 100644 index 0000000..53941fb --- /dev/null +++ b/unit_test/golden/Penryn.golden @@ -0,0 +1,355 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 45 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 2600 + +***************************************************************************************** +Processor: + Area = 92.0211 mm^2 + Peak Power = 45.9769 W + Total Leakage = 9.27425 W + Peak Dynamic = 36.7026 W + Subthreshold Leakage = 8.67012 W + Subthreshold Leakage with power gating = 4.25769 W + Gate Leakage = 0.604133 W + Runtime Dynamic = 36.1433 W + + Total Cores: 2 cores + Device Type= ITRS high performance device type + Area = 42.8547 mm^2 + Peak Dynamic = 29.0117 W + Subthreshold Leakage = 5.85366 W + Subthreshold Leakage with power gating = 2.66249 W + Gate Leakage = 0.54912 W + Runtime Dynamic = 22.9548 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 48.2462 mm^2 + Peak Dynamic = 3.63467 W + Subthreshold Leakage = 2.78837 W + Subthreshold Leakage with power gating = 1.58256 W + Gate Leakage = 0.0511935 W + Runtime Dynamic = 10.0867 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.920174 mm^2 + Peak Dynamic = 4.05622 W + Subthreshold Leakage = 0.0280913 W + Subthreshold Leakage with power gating = 0.0126411 W + Gate Leakage = 0.0038197 W + Runtime Dynamic = 3.10181 W + +***************************************************************************************** +Core: + Area = 21.4274 mm^2 + Peak Dynamic = 14.5059 W + Subthreshold Leakage = 2.92683 W + Subthreshold Leakage with power gating = 1.33124 W + Gate Leakage = 0.27456 W + Runtime Dynamic = 22.9548 W + + Instruction Fetch Unit: + Area = 3.38525 mm^2 + Peak Dynamic = 1.86632 W + Subthreshold Leakage = 0.374573 W + Subthreshold Leakage with power gating = 0.177025 W + Gate Leakage = 0.0281094 W + Runtime Dynamic = 2.53475 W + + Instruction Cache: + Area = 0.839673 mm^2 + Peak Dynamic = 0.35415 W + Subthreshold Leakage = 0.0685466 W + Subthreshold Leakage with power gating = 0.0347339 W + Gate Leakage = 0.00506472 W + Runtime Dynamic = 0.942467 W + + Branch Target Buffer: + Area = 0.478297 mm^2 + Peak Dynamic = 0.057097 W + Subthreshold Leakage = 0.0201964 W + Subthreshold Leakage with power gating = 0.0118989 W + Gate Leakage = 0.000985866 W + Runtime Dynamic = 0.228388 W + + Branch Predictor: + Area = 0.139692 mm^2 + Peak Dynamic = 0.049211 W + Subthreshold Leakage = 0.0128811 W + Subthreshold Leakage with power gating = 0.00749425 W + Gate Leakage = 0.00069906 W + Runtime Dynamic = 0.0440343 W + + Global Predictor: + Area = 0.0444051 mm^2 + Peak Dynamic = 0.0126821 W + Subthreshold Leakage = 0.00446496 W + Subthreshold Leakage with power gating = 0.00260093 W + Gate Leakage = 0.000231968 W + Runtime Dynamic = 0.0135045 W + + Local Predictor: + L1_Local Predictor: + Area = 0.0264222 mm^2 + Peak Dynamic = 0.0103349 W + Subthreshold Leakage = 0.00225884 W + Subthreshold Leakage with power gating = 0.00131188 W + Gate Leakage = 0.00012879 W + Runtime Dynamic = 0.0112737 W + + L2_Local Predictor: + Area = 0.0135677 mm^2 + Peak Dynamic = 0.00527074 W + Subthreshold Leakage = 0.00114429 W + Subthreshold Leakage with power gating = 0.00066265 W + Gate Leakage = 6.76797e-05 W + Runtime Dynamic = 0.00575047 W + + Chooser: + Area = 0.0444051 mm^2 + Peak Dynamic = 0.0126821 W + Subthreshold Leakage = 0.00446496 W + Subthreshold Leakage with power gating = 0.00260093 W + Gate Leakage = 0.000231968 W + Runtime Dynamic = 0.0135045 W + + RAS: + Area = 0.0108915 mm^2 + Peak Dynamic = 0.00824119 W + Subthreshold Leakage = 0.000548044 W + Subthreshold Leakage with power gating = 0.000317857 W + Gate Leakage = 3.86548e-05 W + Runtime Dynamic = 1.04286e-06 W + + Instruction Buffer: + Area = 0.0245435 mm^2 + Peak Dynamic = 0.419404 W + Subthreshold Leakage = 0.000892643 W + Subthreshold Leakage with power gating = 0.000472166 W + Gate Leakage = 5.69632e-05 W + Runtime Dynamic = 0.279603 W + + Instruction Decoder: + Area = 1.85799 mm^2 + Peak Dynamic = 0.93267 W + Subthreshold Leakage = 0.260323 W + Subthreshold Leakage with power gating = 0.117145 W + Gate Leakage = 0.0185411 W + Runtime Dynamic = 0.93267 W + + Renaming Unit: + Area = 0.0743824 mm^2 + Peak Dynamic = 0.748825 W + Subthreshold Leakage = 0.0125017 W + Subthreshold Leakage with power gating = 0.00572037 W + Gate Leakage = 0.00283041 W + Runtime Dynamic = 0.916179 W + + Int Front End RAT with 1 internal checkpoints: + Area = 0.0199268 mm^2 + Peak Dynamic = 0.340754 W + Subthreshold Leakage = 0.000271788 W + Subthreshold Leakage with power gating = 0.000154073 W + Gate Leakage = 2.36369e-05 W + Runtime Dynamic = 0.338163 W + + FP Front End RAT with 1 internal checkpoints: + Area = 0.00940144 mm^2 + Peak Dynamic = 0.163467 W + Subthreshold Leakage = 0.000191945 W + Subthreshold Leakage with power gating = 0.000107691 W + Gate Leakage = 2.22664e-05 W + Runtime Dynamic = 0.0811039 W + + Free List: + Area = 0 mm^2 + Peak Dynamic = 0.0294337 W + Subthreshold Leakage = 0.000303974 W + Subthreshold Leakage with power gating = 0.000178315 W + Gate Leakage = 2.2857e-05 W + Runtime Dynamic = 0.0665704 W + + Load Store Unit: + Area = 4.9354 mm^2 + Peak Dynamic = 2.94502 W + Subthreshold Leakage = 0.3089 W + Subthreshold Leakage with power gating = 0.143598 W + Gate Leakage = 0.0321357 W + Runtime Dynamic = 6.83191 W + + Data Cache: + Area = 2.98993 mm^2 + Peak Dynamic = 2.18331 W + Subthreshold Leakage = 0.214899 W + Subthreshold Leakage with power gating = 0.101298 W + Gate Leakage = 0.0183794 W + Runtime Dynamic = 5.45053 W + + LoadQ: + Area = 0.290994 mm^2 + Peak Dynamic = 0.142027 W + Subthreshold Leakage = 0.0156608 W + Subthreshold Leakage with power gating = 0.00704736 W + Gate Leakage = 0.00143197 W + Runtime Dynamic = 0.142027 W + + StoreQ: + Area = 1.16484 mm^2 + Peak Dynamic = 0.565884 W + Subthreshold Leakage = 0.0666062 W + Subthreshold Leakage with power gating = 0.0299728 W + Gate Leakage = 0.00956261 W + Runtime Dynamic = 1.13177 W + + Memory Management Unit: + Area = 0.367881 mm^2 + Peak Dynamic = 0.250303 W + Subthreshold Leakage = 0.0327059 W + Subthreshold Leakage with power gating = 0.0147176 W + Runtime Dynamic = 1.19554 W + + Itlb: + Area = 0.0610812 mm^2 + Peak Dynamic = 0.021444 W + Subthreshold Leakage = 0.00561472 W + Subthreshold Leakage with power gating = 0.00252662 W + Gate Leakage = 0.000468934 W + Runtime Dynamic = 0.171553 W + + Dtlb: + Area = 0.261746 mm^2 + Peak Dynamic = 0.0943775 W + Subthreshold Leakage = 0.0153572 W + Subthreshold Leakage with power gating = 0.00691074 W + Gate Leakage = 0.0012438 W + Runtime Dynamic = 0.755023 W + + Execution Unit: + Area = 8.26776 mm^2 + Peak Dynamic = 8.69538 W + Subthreshold Leakage = 1.09452 W + Subthreshold Leakage with power gating = 0.49355 W + Runtime Dynamic = 11.4764 W + + Register Files: + Area = 0.583006 mm^2 + Peak Dynamic = 0.64366 W + Subthreshold Leakage = 0.00595865 W + Subthreshold Leakage with power gating = 0.00285503 W + Gate Leakage = 0.000518923 W + Runtime Dynamic = 0.331007 W + + Integer RF: + Area = 0.362766 mm^2 + Peak Dynamic = 0.544223 W + Subthreshold Leakage = 0.00360958 W + Subthreshold Leakage with power gating = 0.00168499 W + Gate Leakage = 0.000324933 W + Runtime Dynamic = 0.291548 W + + Floating Point RF: + Area = 0.22024 mm^2 + Peak Dynamic = 0.0994375 W + Subthreshold Leakage = 0.00234907 W + Subthreshold Leakage with power gating = 0.00117005 W + Gate Leakage = 0.00019399 W + Runtime Dynamic = 0.0394593 W + + Instruction Scheduler: + Area = 2.22554 mm^2 + Peak Dynamic = 3.35629 W + Subthreshold Leakage = 0.0772696 W + Subthreshold Leakage with power gating = 0.0356141 W + Gate Leakage = 0.00703556 W + Runtime Dynamic = 4.05601 W + + Instruction Window: + Area = 0.997596 mm^2 + Peak Dynamic = 1.40833 W + Subthreshold Leakage = 0.0468274 W + Subthreshold Leakage with power gating = 0.0210724 W + Gate Leakage = 0.00439376 W + Runtime Dynamic = 1.84106 W + + FP Instruction Window: + Area = 0.393359 mm^2 + Peak Dynamic = 0.86449 W + Subthreshold Leakage = 0.0205822 W + Subthreshold Leakage with power gating = 0.00926197 W + Gate Leakage = 0.00193284 W + Runtime Dynamic = 1.13148 W + + ROB: + Area = 0.834587 mm^2 + Peak Dynamic = 1.08347 W + Subthreshold Leakage = 0.00985999 W + Subthreshold Leakage with power gating = 0.00527977 W + Gate Leakage = 0.000708958 W + Runtime Dynamic = 1.08347 W + + Integer ALUs (Count: 6 ): + Area = 0.47087 mm^2 + Peak Dynamic = 1.58135 W + Subthreshold Leakage = 0.23639 W + Subthreshold Leakage with power gating = 0.106375 W + Gate Leakage = 0.0221076 W + Runtime Dynamic = 0.825862 W + + Floating Point Units (FPUs) (Count: 2 ): + Area = 4.6585 mm^2 + Peak Dynamic = 0.51663 W + Subthreshold Leakage = 0.584673 W + Subthreshold Leakage with power gating = 0.263103 W + Gate Leakage = 0.0546797 W + Runtime Dynamic = 0.966618 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.235435 mm^2 + Peak Dynamic = 0.193324 W + Subthreshold Leakage = 0.118195 W + Subthreshold Leakage with power gating = 0.0531876 W + Gate Leakage = 0.0110538 W + Runtime Dynamic = 1.14807 W + + Results Broadcast Bus: + Area Overhead = 0.0493523 mm^2 + Peak Dynamic = 2.18897 W + Subthreshold Leakage = 0.0602996 W + Subthreshold Leakage with power gating = 0.0271348 W + Gate Leakage = 0.00563933 W + Runtime Dynamic = 3.71849 W + +***************************************************************************************** +L2 + Area = 48.2462 mm^2 + Peak Dynamic = 3.63467 W + Subthreshold Leakage = 2.78837 W + Subthreshold Leakage with power gating = 1.58256 W + Gate Leakage = 0.0511935 W + Runtime Dynamic = 10.0867 W + +***************************************************************************************** +BUSES + Area = 0.920174 mm^2 + Peak Dynamic = 4.05622 W + Subthreshold Leakage = 0.0280913 W + Subthreshold Leakage with power gating = 0.0126411 W + Gate Leakage = 0.0038197 W + Runtime Dynamic = 3.10181 W + + Bus: + Area = 0.920174 mm^2 + Peak Dynamic = 4.05622 W + Subthreshold Leakage = 0.0280913 W + Subthreshold Leakage with power gating = 0.0126411 W + Gate Leakage = 0.0038197 W + Runtime Dynamic = 3.10181 W + +***************************************************************************************** diff --git a/unit_test/golden/T1.golden b/unit_test/golden/T1.golden new file mode 100644 index 0000000..9e8c2f7 --- /dev/null +++ b/unit_test/golden/T1.golden @@ -0,0 +1,333 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 90 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 1200 + +***************************************************************************************** +Processor: + Area = 269.699 mm^2 + Peak Power = 52.5916 W + Total Leakage = 9.99442 W + Peak Dynamic = 42.5972 W + Subthreshold Leakage = 8.87864 W + Subthreshold Leakage with power gating = 4.30074 W + Gate Leakage = 1.11578 W + Runtime Dynamic = 38.1762 W + + Total Cores: 8 cores + Device Type= ITRS high performance device type + Area = 107.922 mm^2 + Peak Dynamic = 25.2367 W + Subthreshold Leakage = 5.37285 W + Subthreshold Leakage with power gating = 2.44337 W + Gate Leakage = 0.747726 W + Runtime Dynamic = 11.8965 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 107.195 mm^2 + Peak Dynamic = 5.30732 W + Subthreshold Leakage = 2.38347 W + Subthreshold Leakage with power gating = 1.3549 W + Gate Leakage = 0.194952 W + Runtime Dynamic = 2.0874 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 14.0901 mm^2 + Peak Dynamic = 3.5751 W + Subthreshold Leakage = 0.287284 W + Subthreshold Leakage with power gating = 0.129278 W + Gate Leakage = 0.043831 W + Runtime Dynamic = 15.9829 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 9.28967 mm^2 + Peak Dynamic = 3.61212 W + Subthreshold Leakage = 0.495718 W + Subthreshold Leakage with power gating = 0.220003 W + Gate Leakage = 0.0861039 W + Runtime Dynamic = 2.60072 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 31.2015 mm^2 + Peak Dynamic = 4.86589 W + Subthreshold Leakage = 0.339319 W + Subthreshold Leakage with power gating = 0.153191 W + Gate Leakage = 0.0431706 W + Runtime Dynamic = 5.60858 W + +***************************************************************************************** +Core: + Area = 13.4903 mm^2 + Peak Dynamic = 3.15459 W + Subthreshold Leakage = 0.671606 W + Subthreshold Leakage with power gating = 0.305421 W + Gate Leakage = 0.0934657 W + Runtime Dynamic = 11.8965 W + + Instruction Fetch Unit: + Area = 4.52098 mm^2 + Peak Dynamic = 0.820064 W + Subthreshold Leakage = 0.0730403 W + Subthreshold Leakage with power gating = 0.0344087 W + Gate Leakage = 0.0120201 W + Runtime Dynamic = 2.52268 W + + Instruction Cache: + Area = 4.33166 mm^2 + Peak Dynamic = 0.566876 W + Subthreshold Leakage = 0.0620423 W + Subthreshold Leakage with power gating = 0.0294395 W + Gate Leakage = 0.00909673 W + Runtime Dynamic = 0.723821 W + + Instruction Buffer: + Area = 0.0101113 mm^2 + Peak Dynamic = 0.0128945 W + Subthreshold Leakage = 0.000200376 W + Subthreshold Leakage with power gating = 0.000110266 W + Gate Leakage = 2.94563e-05 W + Runtime Dynamic = 0.103156 W + + Instruction Decoder: + Area = 0.0229327 mm^2 + Peak Dynamic = 0.169467 W + Subthreshold Leakage = 0.00259055 W + Subthreshold Leakage with power gating = 0.00116575 W + Gate Leakage = 0.000252139 W + Runtime Dynamic = 1.35574 W + + Load Store Unit: + Area = 1.82703 mm^2 + Peak Dynamic = 0.328253 W + Subthreshold Leakage = 0.0333486 W + Subthreshold Leakage with power gating = 0.0159146 W + Gate Leakage = 0.00541692 W + Runtime Dynamic = 1.46054 W + + Data Cache: + Area = 1.22746 mm^2 + Peak Dynamic = 0.198903 W + Subthreshold Leakage = 0.0214063 W + Subthreshold Leakage with power gating = 0.0105406 W + Gate Leakage = 0.00230316 W + Runtime Dynamic = 0.18419 W + + Load/Store Queue: + Area = 0.291398 mm^2 + Peak Dynamic = 0.0585241 W + Subthreshold Leakage = 0.00373526 W + Subthreshold Leakage with power gating = 0.00168087 W + Gate Leakage = 0.000472027 W + Runtime Dynamic = 0.936385 W + + Memory Management Unit: + Area = 1.37839 mm^2 + Peak Dynamic = 0.244766 W + Subthreshold Leakage = 0.0239568 W + Subthreshold Leakage with power gating = 0.0107806 W + Runtime Dynamic = 1.12072 W + + Itlb: + Area = 0.611052 mm^2 + Peak Dynamic = 0.0338501 W + Subthreshold Leakage = 0.00787491 W + Subthreshold Leakage with power gating = 0.00354371 W + Gate Leakage = 0.00147216 W + Runtime Dynamic = 0.135405 W + + Dtlb: + Area = 0.611052 mm^2 + Peak Dynamic = 0.0338501 W + Subthreshold Leakage = 0.00787491 W + Subthreshold Leakage with power gating = 0.00354371 W + Gate Leakage = 0.00147216 W + Runtime Dynamic = 0.135405 W + + Execution Unit: + Area = 2.46158 mm^2 + Peak Dynamic = 1.76151 W + Subthreshold Leakage = 0.209828 W + Subthreshold Leakage with power gating = 0.0951727 W + Runtime Dynamic = 6.7926 W + + Register Files: + Area = 0.41989 mm^2 + Peak Dynamic = 0.141236 W + Subthreshold Leakage = 0.00672375 W + Subthreshold Leakage with power gating = 0.00377566 W + Gate Leakage = 0.000471581 W + Runtime Dynamic = 0.459638 W + + Integer RF: + Area = 0.122578 mm^2 + Peak Dynamic = 0.100597 W + Subthreshold Leakage = 0.000506006 W + Subthreshold Leakage with power gating = 0.000264706 W + Gate Leakage = 5.15702e-05 W + Runtime Dynamic = 0.448321 W + + Floating Point RF: + Area = 0.122578 mm^2 + Peak Dynamic = 0.0406385 W + Subthreshold Leakage = 0.000506006 W + Subthreshold Leakage with power gating = 0.000264706 W + Gate Leakage = 5.15702e-05 W + Runtime Dynamic = 0.0112386 W + + Register Windows: + Area = 0.174734 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00571174 W + Subthreshold Leakage with power gating = 0.00324625 W + Gate Leakage = 0.000368441 W + Runtime Dynamic = 7.92626e-05 W + + Instruction Scheduler: + Area = 0.0479292 mm^2 + Peak Dynamic = 0.0511572 W + Subthreshold Leakage = 0.000644897 W + Subthreshold Leakage with power gating = 0.000290204 W + Gate Leakage = 9.05719e-05 W + Runtime Dynamic = 0.420611 W + + Instruction Window: + Area = 0.0479292 mm^2 + Peak Dynamic = 0.0511572 W + Subthreshold Leakage = 0.000644897 W + Subthreshold Leakage with power gating = 0.000290204 W + Gate Leakage = 9.05719e-05 W + Runtime Dynamic = 0.420611 W + + Integer ALUs (Count: 1 ): + Area = 0.16016 mm^2 + Peak Dynamic = 0.305285 W + Subthreshold Leakage = 0.0321485 W + Subthreshold Leakage with power gating = 0.0144668 W + Gate Leakage = 0.00411202 W + Runtime Dynamic = 2.71365 W + + Floating Point Units (FPUs) (Count: 0.125 ): + Area = 1.16463 mm^2 + Peak Dynamic = 0.0508808 W + Subthreshold Leakage = 0.0584431 W + Subthreshold Leakage with power gating = 0.0262994 W + Gate Leakage = 0.00747528 W + Runtime Dynamic = 0.101762 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.48048 mm^2 + Peak Dynamic = 0.339206 W + Subthreshold Leakage = 0.0964456 W + Subthreshold Leakage with power gating = 0.0434005 W + Gate Leakage = 0.0123361 W + Runtime Dynamic = 0.678411 W + + Results Broadcast Bus: + Area Overhead = 0.0322089 mm^2 + Peak Dynamic = 0.618773 W + Subthreshold Leakage = 0.00721544 W + Subthreshold Leakage with power gating = 0.00324695 W + Gate Leakage = 0.000922905 W + Runtime Dynamic = 1.19466 W + +***************************************************************************************** +L2 + Area = 26.7988 mm^2 + Peak Dynamic = 1.32683 W + Subthreshold Leakage = 0.595868 W + Subthreshold Leakage with power gating = 0.338725 W + Gate Leakage = 0.0487379 W + Runtime Dynamic = 2.0874 W + +***************************************************************************************** +First Level Directory + Area = 3.52251 mm^2 + Peak Dynamic = 0.893775 W + Subthreshold Leakage = 0.0718209 W + Subthreshold Leakage with power gating = 0.0323194 W + Gate Leakage = 0.0109578 W + Runtime Dynamic = 15.9829 W + +***************************************************************************************** +Memory Controller: + Area = 7.80038 mm^2 + Peak Dynamic = 1.21647 W + Subthreshold Leakage = 0.0848297 W + Subthreshold Leakage with power gating = 0.0382977 W + Gate Leakage = 0.0107926 W + Runtime Dynamic = 5.60858 W + + Front End Engine: + Area = 0.570912 mm^2 + Peak Dynamic = 0.134991 W + Subthreshold Leakage = 0.00501617 W + Subthreshold Leakage with power gating = 0.00238159 W + Gate Leakage = 0.000583944 W + Runtime Dynamic = 0.593972 W + + Transaction Engine: + Area = 2.59502 mm^2 + Peak Dynamic = 0.569482 W + Subthreshold Leakage = 0.0286491 W + Subthreshold Leakage with power gating = 0.0128921 W + Gate Leakage = 0.00366442 W + Runtime Dynamic = 2.50577 W + + PHY: + Area = 4.63445 mm^2 + Peak Dynamic = 0.512 W + Subthreshold Leakage = 0.0511644 W + Subthreshold Leakage with power gating = 0.023024 W + Gate Leakage = 0.00654429 W + Runtime Dynamic = 2.50885 W + +***************************************************************************************** +NOC + Area = 9.28967 mm^2 + Peak Dynamic = 3.61212 W + Subthreshold Leakage = 0.495718 W + Subthreshold Leakage with power gating = 0.220003 W + Gate Leakage = 0.0861039 W + Runtime Dynamic = 2.60072 W + + Router: + Area = 4.64483 mm^2 + Peak Dynamic = 1.80606 W + Subthreshold Leakage = 0.247859 W + Subthreshold Leakage with power gating = 0.110001 W + Gate Leakage = 0.0430519 W + Runtime Dynamic = 2.60072 W + + Virtual Channel Buffer: + Area = 1.2867 mm^2 + Peak Dynamic = 0.317135 W + Subthreshold Leakage = 0.00389846 W + Subthreshold Leakage with power gating = 0.000219289 W + Gate Leakage = 0.000508964 W + Runtime Dynamic = 0.456674 W + + Crossbar: + Area = 1.35717 mm^2 + Peak Dynamic = 1.47654 W + Subthreshold Leakage = 0.243949 W + Subthreshold Leakage with power gating = 0.109777 W + Gate Leakage = 0.0425414 W + Runtime Dynamic = 2.12622 W + + Arbiter: + Peak Dynamic = 0.0123809 W + Subthreshold Leakage = 1.15783e-05 W + Subthreshold Leakage with power gating = 5.21022e-06 W + Gate Leakage = 1.54103e-06 W + Runtime Dynamic = 0.0178284 W + +***************************************************************************************** diff --git a/unit_test/golden/T1_DC_64.golden b/unit_test/golden/T1_DC_64.golden new file mode 100644 index 0000000..669dddf --- /dev/null +++ b/unit_test/golden/T1_DC_64.golden @@ -0,0 +1,299 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 3500 + +***************************************************************************************** +Processor: + Area = 324.938 mm^2 + Peak Power = 120.932 W + Total Leakage = 29.8881 W + Peak Dynamic = 91.0444 W + Subthreshold Leakage = 29.5655 W + Subthreshold Leakage with power gating = 14.458 W + Gate Leakage = 0.322558 W + Runtime Dynamic = 9.51724 W + + Total Cores: 64 cores + Device Type= ITRS high performance device type + Area = 87.2645 mm^2 + Peak Dynamic = 45.0372 W + Subthreshold Leakage = 8.47234 W + Subthreshold Leakage with power gating = 3.86197 W + Gate Leakage = 0.0864485 W + Runtime Dynamic = 5.79568 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 162.449 mm^2 + Peak Dynamic = 22.3605 W + Subthreshold Leakage = 9.80658 W + Subthreshold Leakage with power gating = 5.3988 W + Gate Leakage = 0.100222 W + Runtime Dynamic = 0.691017 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 22.1914 mm^2 + Peak Dynamic = 0.494266 W + Subthreshold Leakage = 1.70833 W + Subthreshold Leakage with power gating = 0.944499 W + Gate Leakage = 0.0156025 W + Runtime Dynamic = 0.191199 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 53.0328 mm^2 + Peak Dynamic = 23.1524 W + Subthreshold Leakage = 9.57824 W + Subthreshold Leakage with power gating = 4.2527 W + Gate Leakage = 0.120285 W + Runtime Dynamic = 2.83934 W + +***************************************************************************************** +Core: + Area = 1.36351 mm^2 + Peak Dynamic = 0.703707 W + Subthreshold Leakage = 0.13238 W + Subthreshold Leakage with power gating = 0.0603433 W + Gate Leakage = 0.00135076 W + Runtime Dynamic = 5.79568 W + + Instruction Fetch Unit: + Area = 0.166429 mm^2 + Peak Dynamic = 0.148963 W + Subthreshold Leakage = 0.0151819 W + Subthreshold Leakage with power gating = 0.00721124 W + Gate Leakage = 0.000159011 W + Runtime Dynamic = 0.749915 W + + Instruction Cache: + Area = 0.155222 mm^2 + Peak Dynamic = 0.110246 W + Subthreshold Leakage = 0.0126344 W + Subthreshold Leakage with power gating = 0.00606006 W + Gate Leakage = 0.00010745 W + Runtime Dynamic = 0.0833082 W + + Instruction Buffer: + Area = 0.000553402 mm^2 + Peak Dynamic = 0.00237388 W + Subthreshold Leakage = 4.78586e-05 W + Subthreshold Leakage with power gating = 2.63375e-05 W + Gate Leakage = 4.16791e-07 W + Runtime Dynamic = 0.0189911 W + + Instruction Decoder: + Area = 0.00131543 mm^2 + Peak Dynamic = 0.0246042 W + Subthreshold Leakage = 0.000538954 W + Subthreshold Leakage with power gating = 0.00024253 W + Gate Leakage = 3.91915e-06 W + Runtime Dynamic = 0.196833 W + + Load Store Unit: + Area = 0.108159 mm^2 + Peak Dynamic = 0.0709358 W + Subthreshold Leakage = 0.00903484 W + Subthreshold Leakage with power gating = 0.00428255 W + Gate Leakage = 0.000106881 W + Runtime Dynamic = 0.708347 W + + Data Cache: + Area = 0.0717513 mm^2 + Peak Dynamic = 0.0458425 W + Subthreshold Leakage = 0.00598917 W + Subthreshold Leakage with power gating = 0.00291199 W + Gate Leakage = 4.9393e-05 W + Runtime Dynamic = 0.0438993 W + + Load/Store Queue: + Area = 0.018086 mm^2 + Peak Dynamic = 0.0133541 W + Subthreshold Leakage = 0.00108498 W + Subthreshold Leakage with power gating = 0.00048824 W + Gate Leakage = 1.02625e-05 W + Runtime Dynamic = 0.213666 W + + Memory Management Unit: + Area = 0.0335457 mm^2 + Peak Dynamic = 0.036723 W + Subthreshold Leakage = 0.00356461 W + Subthreshold Leakage with power gating = 0.00160408 W + Runtime Dynamic = 1.15646 W + + Itlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Dtlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Execution Unit: + Area = 0.26249 mm^2 + Peak Dynamic = 0.447085 W + Subthreshold Leakage = 0.0357461 W + Subthreshold Leakage with power gating = 0.0162617 W + Runtime Dynamic = 3.18096 W + + Register Files: + Area = 0.0242216 mm^2 + Peak Dynamic = 0.0313991 W + Subthreshold Leakage = 0.00159238 W + Subthreshold Leakage with power gating = 0.00089251 W + Gate Leakage = 8.85416e-06 W + Runtime Dynamic = 0.101531 W + + Integer RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.0223217 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.0990301 W + + Floating Point RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.00907735 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.00248242 W + + Register Windows: + Area = 0.0109615 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00131867 W + Subthreshold Leakage with power gating = 0.000754893 W + Gate Leakage = 6.78717e-06 W + Runtime Dynamic = 1.84068e-05 W + + Instruction Scheduler: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Instruction Window: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Integer ALUs (Count: 1 ): + Area = 0.0384544 mm^2 + Peak Dynamic = 0.0946992 W + Subthreshold Leakage = 0.00667865 W + Subthreshold Leakage with power gating = 0.00300539 W + Gate Leakage = 6.39207e-05 W + Runtime Dynamic = 0.841771 W + + Floating Point Units (FPUs) (Count: 0.125 ): + Area = 0.0695899 mm^2 + Peak Dynamic = 0.0157832 W + Subthreshold Leakage = 0.00302155 W + Subthreshold Leakage with power gating = 0.0013597 W + Gate Leakage = 2.89189e-05 W + Runtime Dynamic = 0.0315664 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.115363 mm^2 + Peak Dynamic = 0.105221 W + Subthreshold Leakage = 0.020036 W + Subthreshold Leakage with power gating = 0.00901618 W + Gate Leakage = 0.000191762 W + Runtime Dynamic = 0.210443 W + + Results Broadcast Bus: + Area Overhead = 0.00256269 mm^2 + Peak Dynamic = 0.146966 W + Subthreshold Leakage = 0.00227703 W + Subthreshold Leakage with power gating = 0.00102466 W + Gate Leakage = 2.17932e-05 W + Runtime Dynamic = 0.28494 W + +***************************************************************************************** +L2 + Area = 2.53827 mm^2 + Peak Dynamic = 0.349383 W + Subthreshold Leakage = 0.153228 W + Subthreshold Leakage with power gating = 0.0843563 W + Gate Leakage = 0.00156597 W + Runtime Dynamic = 0.691017 W + +***************************************************************************************** +Second Level Directory + Area = 2.77392 mm^2 + Peak Dynamic = 0.0617833 W + Subthreshold Leakage = 0.213541 W + Subthreshold Leakage with power gating = 0.118062 W + Gate Leakage = 0.00195031 W + Runtime Dynamic = 0.191199 W + +***************************************************************************************** +NOC + Area = 53.0328 mm^2 + Peak Dynamic = 23.1524 W + Subthreshold Leakage = 9.57824 W + Subthreshold Leakage with power gating = 4.2527 W + Gate Leakage = 0.120285 W + Runtime Dynamic = 2.83934 W + + Router: + Area = 0.602135 mm^2 + Peak Dynamic = 0.231369 W + Subthreshold Leakage = 0.127816 W + Subthreshold Leakage with power gating = 0.0566188 W + Gate Leakage = 0.00166364 W + Runtime Dynamic = 1.66586 W + + Virtual Channel Buffer: + Area = 0.165683 mm^2 + Peak Dynamic = 0.0507616 W + Subthreshold Leakage = 0.00249596 W + Subthreshold Leakage with power gating = 0.000224636 W + Gate Leakage = 2.45875e-05 W + Runtime Dynamic = 0.365484 W + + Crossbar: + Area = 0.160976 mm^2 + Peak Dynamic = 0.179891 W + Subthreshold Leakage = 0.12532 W + Subthreshold Leakage with power gating = 0.056394 W + Gate Leakage = 0.00163905 W + Runtime Dynamic = 1.29522 W + + Arbiter: + Peak Dynamic = 0.000716052 W + Subthreshold Leakage = 3.67148e-07 W + Subthreshold Leakage with power gating = 1.65217e-07 W + Gate Leakage = 3.86991e-09 W + Runtime Dynamic = 0.00515558 W + + Per Router Links: + Area = 0.226503 mm^2 + Peak Dynamic = 0.130387 W + Subthreshold Leakage = 0.0218437 W + Subthreshold Leakage with power gating = 0.00982965 W + Gate Leakage = 0.00021581 W + Runtime Dynamic = 1.17349 W + +***************************************************************************************** diff --git a/unit_test/golden/T1_SBT_64.golden b/unit_test/golden/T1_SBT_64.golden new file mode 100644 index 0000000..c251262 --- /dev/null +++ b/unit_test/golden/T1_SBT_64.golden @@ -0,0 +1,281 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 3500 + +***************************************************************************************** +Processor: + Area = 302.219 mm^2 + Peak Power = 118.359 W + Total Leakage = 28.1127 W + Peak Dynamic = 90.2464 W + Subthreshold Leakage = 27.8063 W + Subthreshold Leakage with power gating = 13.4906 W + Gate Leakage = 0.306453 W + Runtime Dynamic = 9.28332 W + + Total Cores: 64 cores + Device Type= ITRS high performance device type + Area = 87.2645 mm^2 + Peak Dynamic = 45.0372 W + Subthreshold Leakage = 8.47234 W + Subthreshold Leakage with power gating = 3.86197 W + Gate Leakage = 0.0864485 W + Runtime Dynamic = 5.79568 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 162.449 mm^2 + Peak Dynamic = 22.3605 W + Subthreshold Leakage = 9.80658 W + Subthreshold Leakage with power gating = 5.3988 W + Gate Leakage = 0.100222 W + Runtime Dynamic = 0.691017 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 52.5051 mm^2 + Peak Dynamic = 22.8486 W + Subthreshold Leakage = 9.52735 W + Subthreshold Leakage with power gating = 4.2298 W + Gate Leakage = 0.119782 W + Runtime Dynamic = 2.79662 W + +***************************************************************************************** +Core: + Area = 1.36351 mm^2 + Peak Dynamic = 0.703707 W + Subthreshold Leakage = 0.13238 W + Subthreshold Leakage with power gating = 0.0603433 W + Gate Leakage = 0.00135076 W + Runtime Dynamic = 5.79568 W + + Instruction Fetch Unit: + Area = 0.166429 mm^2 + Peak Dynamic = 0.148963 W + Subthreshold Leakage = 0.0151819 W + Subthreshold Leakage with power gating = 0.00721124 W + Gate Leakage = 0.000159011 W + Runtime Dynamic = 0.749915 W + + Instruction Cache: + Area = 0.155222 mm^2 + Peak Dynamic = 0.110246 W + Subthreshold Leakage = 0.0126344 W + Subthreshold Leakage with power gating = 0.00606006 W + Gate Leakage = 0.00010745 W + Runtime Dynamic = 0.0833082 W + + Instruction Buffer: + Area = 0.000553402 mm^2 + Peak Dynamic = 0.00237388 W + Subthreshold Leakage = 4.78586e-05 W + Subthreshold Leakage with power gating = 2.63375e-05 W + Gate Leakage = 4.16791e-07 W + Runtime Dynamic = 0.0189911 W + + Instruction Decoder: + Area = 0.00131543 mm^2 + Peak Dynamic = 0.0246042 W + Subthreshold Leakage = 0.000538954 W + Subthreshold Leakage with power gating = 0.00024253 W + Gate Leakage = 3.91915e-06 W + Runtime Dynamic = 0.196833 W + + Load Store Unit: + Area = 0.108159 mm^2 + Peak Dynamic = 0.0709358 W + Subthreshold Leakage = 0.00903484 W + Subthreshold Leakage with power gating = 0.00428255 W + Gate Leakage = 0.000106881 W + Runtime Dynamic = 0.708347 W + + Data Cache: + Area = 0.0717513 mm^2 + Peak Dynamic = 0.0458425 W + Subthreshold Leakage = 0.00598917 W + Subthreshold Leakage with power gating = 0.00291199 W + Gate Leakage = 4.9393e-05 W + Runtime Dynamic = 0.0438993 W + + Load/Store Queue: + Area = 0.018086 mm^2 + Peak Dynamic = 0.0133541 W + Subthreshold Leakage = 0.00108498 W + Subthreshold Leakage with power gating = 0.00048824 W + Gate Leakage = 1.02625e-05 W + Runtime Dynamic = 0.213666 W + + Memory Management Unit: + Area = 0.0335457 mm^2 + Peak Dynamic = 0.036723 W + Subthreshold Leakage = 0.00356461 W + Subthreshold Leakage with power gating = 0.00160408 W + Runtime Dynamic = 1.15646 W + + Itlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Dtlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Execution Unit: + Area = 0.26249 mm^2 + Peak Dynamic = 0.447085 W + Subthreshold Leakage = 0.0357461 W + Subthreshold Leakage with power gating = 0.0162617 W + Runtime Dynamic = 3.18096 W + + Register Files: + Area = 0.0242216 mm^2 + Peak Dynamic = 0.0313991 W + Subthreshold Leakage = 0.00159238 W + Subthreshold Leakage with power gating = 0.00089251 W + Gate Leakage = 8.85416e-06 W + Runtime Dynamic = 0.101531 W + + Integer RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.0223217 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.0990301 W + + Floating Point RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.00907735 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.00248242 W + + Register Windows: + Area = 0.0109615 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00131867 W + Subthreshold Leakage with power gating = 0.000754893 W + Gate Leakage = 6.78717e-06 W + Runtime Dynamic = 1.84068e-05 W + + Instruction Scheduler: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Instruction Window: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Integer ALUs (Count: 1 ): + Area = 0.0384544 mm^2 + Peak Dynamic = 0.0946992 W + Subthreshold Leakage = 0.00667865 W + Subthreshold Leakage with power gating = 0.00300539 W + Gate Leakage = 6.39207e-05 W + Runtime Dynamic = 0.841771 W + + Floating Point Units (FPUs) (Count: 0.125 ): + Area = 0.0695899 mm^2 + Peak Dynamic = 0.0157832 W + Subthreshold Leakage = 0.00302155 W + Subthreshold Leakage with power gating = 0.0013597 W + Gate Leakage = 2.89189e-05 W + Runtime Dynamic = 0.0315664 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.115363 mm^2 + Peak Dynamic = 0.105221 W + Subthreshold Leakage = 0.020036 W + Subthreshold Leakage with power gating = 0.00901618 W + Gate Leakage = 0.000191762 W + Runtime Dynamic = 0.210443 W + + Results Broadcast Bus: + Area Overhead = 0.00256269 mm^2 + Peak Dynamic = 0.146966 W + Subthreshold Leakage = 0.00227703 W + Subthreshold Leakage with power gating = 0.00102466 W + Gate Leakage = 2.17932e-05 W + Runtime Dynamic = 0.28494 W + +***************************************************************************************** +L2 + Area = 2.53827 mm^2 + Peak Dynamic = 0.349383 W + Subthreshold Leakage = 0.153228 W + Subthreshold Leakage with power gating = 0.0843563 W + Gate Leakage = 0.00156597 W + Runtime Dynamic = 0.691017 W + +***************************************************************************************** +NOC + Area = 52.5051 mm^2 + Peak Dynamic = 22.8486 W + Subthreshold Leakage = 9.52735 W + Subthreshold Leakage with power gating = 4.2298 W + Gate Leakage = 0.119782 W + Runtime Dynamic = 2.79662 W + + Router: + Area = 0.602135 mm^2 + Peak Dynamic = 0.231369 W + Subthreshold Leakage = 0.127816 W + Subthreshold Leakage with power gating = 0.0566188 W + Gate Leakage = 0.00166364 W + Runtime Dynamic = 1.66586 W + + Virtual Channel Buffer: + Area = 0.165683 mm^2 + Peak Dynamic = 0.0507616 W + Subthreshold Leakage = 0.00249596 W + Subthreshold Leakage with power gating = 0.000224636 W + Gate Leakage = 2.45875e-05 W + Runtime Dynamic = 0.365484 W + + Crossbar: + Area = 0.160976 mm^2 + Peak Dynamic = 0.179891 W + Subthreshold Leakage = 0.12532 W + Subthreshold Leakage with power gating = 0.056394 W + Gate Leakage = 0.00163905 W + Runtime Dynamic = 1.29522 W + + Arbiter: + Peak Dynamic = 0.000716052 W + Subthreshold Leakage = 3.67148e-07 W + Subthreshold Leakage with power gating = 1.65217e-07 W + Gate Leakage = 3.86991e-09 W + Runtime Dynamic = 0.00515558 W + + Per Router Links: + Area = 0.218257 mm^2 + Peak Dynamic = 0.125641 W + Subthreshold Leakage = 0.0210485 W + Subthreshold Leakage with power gating = 0.00947181 W + Gate Leakage = 0.000207954 W + Runtime Dynamic = 1.13077 W + +***************************************************************************************** diff --git a/unit_test/golden/T1_ST_64.golden b/unit_test/golden/T1_ST_64.golden new file mode 100644 index 0000000..f9e3789 --- /dev/null +++ b/unit_test/golden/T1_ST_64.golden @@ -0,0 +1,299 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 3500 + +***************************************************************************************** +Processor: + Area = 352.401 mm^2 + Peak Power = 145.772 W + Total Leakage = 31.3899 W + Peak Dynamic = 114.383 W + Subthreshold Leakage = 31.0481 W + Subthreshold Leakage with power gating = 14.9494 W + Gate Leakage = 0.341863 W + Runtime Dynamic = 79.2128 W + + Total Cores: 64 cores + Device Type= ITRS high performance device type + Area = 87.2645 mm^2 + Peak Dynamic = 45.0372 W + Subthreshold Leakage = 8.47234 W + Subthreshold Leakage with power gating = 3.86197 W + Gate Leakage = 0.0864485 W + Runtime Dynamic = 5.79568 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 162.449 mm^2 + Peak Dynamic = 22.3605 W + Subthreshold Leakage = 9.80658 W + Subthreshold Leakage with power gating = 5.3988 W + Gate Leakage = 0.100222 W + Runtime Dynamic = 0.691017 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 49.0405 mm^2 + Peak Dynamic = 23.479 W + Subthreshold Leakage = 3.1317 W + Subthreshold Leakage with power gating = 1.40926 W + Gate Leakage = 0.034322 W + Runtime Dynamic = 69.8371 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 53.6467 mm^2 + Peak Dynamic = 23.5058 W + Subthreshold Leakage = 9.63744 W + Subthreshold Leakage with power gating = 4.27934 W + Gate Leakage = 0.12087 W + Runtime Dynamic = 2.88904 W + +***************************************************************************************** +Core: + Area = 1.36351 mm^2 + Peak Dynamic = 0.703707 W + Subthreshold Leakage = 0.13238 W + Subthreshold Leakage with power gating = 0.0603433 W + Gate Leakage = 0.00135076 W + Runtime Dynamic = 5.79568 W + + Instruction Fetch Unit: + Area = 0.166429 mm^2 + Peak Dynamic = 0.148963 W + Subthreshold Leakage = 0.0151819 W + Subthreshold Leakage with power gating = 0.00721124 W + Gate Leakage = 0.000159011 W + Runtime Dynamic = 0.749915 W + + Instruction Cache: + Area = 0.155222 mm^2 + Peak Dynamic = 0.110246 W + Subthreshold Leakage = 0.0126344 W + Subthreshold Leakage with power gating = 0.00606006 W + Gate Leakage = 0.00010745 W + Runtime Dynamic = 0.0833082 W + + Instruction Buffer: + Area = 0.000553402 mm^2 + Peak Dynamic = 0.00237388 W + Subthreshold Leakage = 4.78586e-05 W + Subthreshold Leakage with power gating = 2.63375e-05 W + Gate Leakage = 4.16791e-07 W + Runtime Dynamic = 0.0189911 W + + Instruction Decoder: + Area = 0.00131543 mm^2 + Peak Dynamic = 0.0246042 W + Subthreshold Leakage = 0.000538954 W + Subthreshold Leakage with power gating = 0.00024253 W + Gate Leakage = 3.91915e-06 W + Runtime Dynamic = 0.196833 W + + Load Store Unit: + Area = 0.108159 mm^2 + Peak Dynamic = 0.0709358 W + Subthreshold Leakage = 0.00903484 W + Subthreshold Leakage with power gating = 0.00428255 W + Gate Leakage = 0.000106881 W + Runtime Dynamic = 0.708347 W + + Data Cache: + Area = 0.0717513 mm^2 + Peak Dynamic = 0.0458425 W + Subthreshold Leakage = 0.00598917 W + Subthreshold Leakage with power gating = 0.00291199 W + Gate Leakage = 4.9393e-05 W + Runtime Dynamic = 0.0438993 W + + Load/Store Queue: + Area = 0.018086 mm^2 + Peak Dynamic = 0.0133541 W + Subthreshold Leakage = 0.00108498 W + Subthreshold Leakage with power gating = 0.00048824 W + Gate Leakage = 1.02625e-05 W + Runtime Dynamic = 0.213666 W + + Memory Management Unit: + Area = 0.0335457 mm^2 + Peak Dynamic = 0.036723 W + Subthreshold Leakage = 0.00356461 W + Subthreshold Leakage with power gating = 0.00160408 W + Runtime Dynamic = 1.15646 W + + Itlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Dtlb: + Area = 0.0121036 mm^2 + Peak Dynamic = 0.00368759 W + Subthreshold Leakage = 0.000801958 W + Subthreshold Leakage with power gating = 0.000360881 W + Gate Leakage = 7.1585e-06 W + Runtime Dynamic = 0.0147508 W + + Execution Unit: + Area = 0.26249 mm^2 + Peak Dynamic = 0.447085 W + Subthreshold Leakage = 0.0357461 W + Subthreshold Leakage with power gating = 0.0162617 W + Runtime Dynamic = 3.18096 W + + Register Files: + Area = 0.0242216 mm^2 + Peak Dynamic = 0.0313991 W + Subthreshold Leakage = 0.00159238 W + Subthreshold Leakage with power gating = 0.00089251 W + Gate Leakage = 8.85416e-06 W + Runtime Dynamic = 0.101531 W + + Integer RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.0223217 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.0990301 W + + Floating Point RF: + Area = 0.00663002 mm^2 + Peak Dynamic = 0.00907735 W + Subthreshold Leakage = 0.000136857 W + Subthreshold Leakage with power gating = 6.88085e-05 W + Gate Leakage = 1.03349e-06 W + Runtime Dynamic = 0.00248242 W + + Register Windows: + Area = 0.0109615 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00131867 W + Subthreshold Leakage with power gating = 0.000754893 W + Gate Leakage = 6.78717e-06 W + Runtime Dynamic = 1.84068e-05 W + + Instruction Scheduler: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Instruction Window: + Area = 0.00295995 mm^2 + Peak Dynamic = 0.010755 W + Subthreshold Leakage = 0.000179818 W + Subthreshold Leakage with power gating = 8.0918e-05 W + Gate Leakage = 1.86726e-06 W + Runtime Dynamic = 0.0878892 W + + Integer ALUs (Count: 1 ): + Area = 0.0384544 mm^2 + Peak Dynamic = 0.0946992 W + Subthreshold Leakage = 0.00667865 W + Subthreshold Leakage with power gating = 0.00300539 W + Gate Leakage = 6.39207e-05 W + Runtime Dynamic = 0.841771 W + + Floating Point Units (FPUs) (Count: 0.125 ): + Area = 0.0695899 mm^2 + Peak Dynamic = 0.0157832 W + Subthreshold Leakage = 0.00302155 W + Subthreshold Leakage with power gating = 0.0013597 W + Gate Leakage = 2.89189e-05 W + Runtime Dynamic = 0.0315664 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.115363 mm^2 + Peak Dynamic = 0.105221 W + Subthreshold Leakage = 0.020036 W + Subthreshold Leakage with power gating = 0.00901618 W + Gate Leakage = 0.000191762 W + Runtime Dynamic = 0.210443 W + + Results Broadcast Bus: + Area Overhead = 0.00256269 mm^2 + Peak Dynamic = 0.146966 W + Subthreshold Leakage = 0.00227703 W + Subthreshold Leakage with power gating = 0.00102466 W + Gate Leakage = 2.17932e-05 W + Runtime Dynamic = 0.28494 W + +***************************************************************************************** +L2 + Area = 2.53827 mm^2 + Peak Dynamic = 0.349383 W + Subthreshold Leakage = 0.153228 W + Subthreshold Leakage with power gating = 0.0843563 W + Gate Leakage = 0.00156597 W + Runtime Dynamic = 0.691017 W + +***************************************************************************************** +Second Level Directory + Area = 49.0405 mm^2 + Peak Dynamic = 23.479 W + Subthreshold Leakage = 3.1317 W + Subthreshold Leakage with power gating = 1.40926 W + Gate Leakage = 0.034322 W + Runtime Dynamic = 69.8371 W + +***************************************************************************************** +NOC + Area = 53.6467 mm^2 + Peak Dynamic = 23.5058 W + Subthreshold Leakage = 9.63744 W + Subthreshold Leakage with power gating = 4.27934 W + Gate Leakage = 0.12087 W + Runtime Dynamic = 2.88904 W + + Router: + Area = 0.602135 mm^2 + Peak Dynamic = 0.231369 W + Subthreshold Leakage = 0.127816 W + Subthreshold Leakage with power gating = 0.0566188 W + Gate Leakage = 0.00166364 W + Runtime Dynamic = 1.66586 W + + Virtual Channel Buffer: + Area = 0.165683 mm^2 + Peak Dynamic = 0.0507616 W + Subthreshold Leakage = 0.00249596 W + Subthreshold Leakage with power gating = 0.000224636 W + Gate Leakage = 2.45875e-05 W + Runtime Dynamic = 0.365484 W + + Crossbar: + Area = 0.160976 mm^2 + Peak Dynamic = 0.179891 W + Subthreshold Leakage = 0.12532 W + Subthreshold Leakage with power gating = 0.056394 W + Gate Leakage = 0.00163905 W + Runtime Dynamic = 1.29522 W + + Arbiter: + Peak Dynamic = 0.000716052 W + Subthreshold Leakage = 3.67148e-07 W + Subthreshold Leakage with power gating = 1.65217e-07 W + Gate Leakage = 3.86991e-09 W + Runtime Dynamic = 0.00515558 W + + Per Router Links: + Area = 0.236095 mm^2 + Peak Dynamic = 0.135909 W + Subthreshold Leakage = 0.0227687 W + Subthreshold Leakage with power gating = 0.0102459 W + Gate Leakage = 0.000224949 W + Runtime Dynamic = 1.22318 W + +***************************************************************************************** diff --git a/unit_test/golden/T2.golden b/unit_test/golden/T2.golden new file mode 100644 index 0000000..fe89613 --- /dev/null +++ b/unit_test/golden/T2.golden @@ -0,0 +1,361 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 65 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 1400 + +***************************************************************************************** +Processor: + Area = 262.515 mm^2 + Peak Power = 72.9693 W + Total Leakage = 17.8605 W + Peak Dynamic = 55.1088 W + Subthreshold Leakage = 14.5272 W + Subthreshold Leakage with power gating = 6.84384 W + Gate Leakage = 3.33333 W + Runtime Dynamic = 39.5694 W + + Total Cores: 8 cores + Device Type= ITRS high performance device type + Area = 109.896 mm^2 + Peak Dynamic = 34.0976 W + Subthreshold Leakage = 8.7111 W + Subthreshold Leakage with power gating = 3.9493 W + Gate Leakage = 1.8738 W + Runtime Dynamic = 18.5631 W + + Total L2s: + Device Type= ITRS high performance device type + Area = 79.3999 mm^2 + Peak Dynamic = 8.1374 W + Subthreshold Leakage = 2.77828 W + Subthreshold Leakage with power gating = 1.53307 W + Gate Leakage = 0.562366 W + Runtime Dynamic = 4.50099 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 8.59141 mm^2 + Peak Dynamic = 2.03499 W + Subthreshold Leakage = 0.265204 W + Subthreshold Leakage with power gating = 0.119342 W + Gate Leakage = 0.105242 W + Runtime Dynamic = 4.57546 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 10.0461 mm^2 + Peak Dynamic = 1.18259 W + Subthreshold Leakage = 1.62858 W + Subthreshold Leakage with power gating = 0.726753 W + Gate Leakage = 0.391672 W + Runtime Dynamic = 1.18259 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 32.4783 mm^2 + Peak Dynamic = 6.28473 W + Subthreshold Leakage = 0.572318 W + Subthreshold Leakage with power gating = 0.258101 W + Gate Leakage = 0.106637 W + Runtime Dynamic = 8.38713 W + + Total NIUs: 2 Network Interface Units + Device Type= ITRS high performance device type + Area = 15.8633 mm^2 + Peak Dynamic = 1.86482 W + Subthreshold Leakage = 0.357626 W + Subthreshold Leakage with power gating = 0.160932 W + Gate Leakage = 0.183662 W + Runtime Dynamic = 1.30537 W + + Total PCIes: 1 PCIe Controllers + Device Type= ITRS high performance device type + Area = 6.24 mm^2 + Peak Dynamic = 1.5067 W + Subthreshold Leakage = 0.214091 W + Subthreshold Leakage with power gating = 0.0963408 W + Gate Leakage = 0.109948 W + Runtime Dynamic = 1.05469 W + +***************************************************************************************** +Core: + Area = 13.737 mm^2 + Peak Dynamic = 4.26219 W + Subthreshold Leakage = 1.08889 W + Subthreshold Leakage with power gating = 0.493663 W + Gate Leakage = 0.234225 W + Runtime Dynamic = 18.5631 W + + Instruction Fetch Unit: + Area = 3.42552 mm^2 + Peak Dynamic = 1.32265 W + Subthreshold Leakage = 0.139453 W + Subthreshold Leakage with power gating = 0.064488 W + Gate Leakage = 0.0386298 W + Runtime Dynamic = 4.4662 W + + Instruction Cache: + Area = 3.18687 mm^2 + Peak Dynamic = 1.01844 W + Subthreshold Leakage = 0.110561 W + Subthreshold Leakage with power gating = 0.0514384 W + Gate Leakage = 0.0265863 W + Runtime Dynamic = 2.05763 W + + Instruction Buffer: + Area = 0.00934836 mm^2 + Peak Dynamic = 0.0100878 W + Subthreshold Leakage = 0.00043592 W + Subthreshold Leakage with power gating = 0.000244228 W + Gate Leakage = 7.39974e-05 W + Runtime Dynamic = 0.161405 W + + Instruction Decoder: + Area = 0.0119193 mm^2 + Peak Dynamic = 0.0892213 W + Subthreshold Leakage = 0.00298091 W + Subthreshold Leakage with power gating = 0.00134141 W + Gate Leakage = 0.000408973 W + Runtime Dynamic = 1.42754 W + + Load Store Unit: + Area = 1.59036 mm^2 + Peak Dynamic = 0.326405 W + Subthreshold Leakage = 0.0717021 W + Subthreshold Leakage with power gating = 0.0333226 W + Gate Leakage = 0.0208935 W + Runtime Dynamic = 3.63948 W + + Data Cache: + Area = 0.651877 mm^2 + Peak Dynamic = 0.129096 W + Subthreshold Leakage = 0.0272598 W + Subthreshold Leakage with power gating = 0.0133236 W + Gate Leakage = 0.00420207 W + Runtime Dynamic = 0.194238 W + + Load/Store Queue: + Area = 0.596287 mm^2 + Peak Dynamic = 0.0948573 W + Subthreshold Leakage = 0.0189667 W + Subthreshold Leakage with power gating = 0.00853499 W + Gate Leakage = 0.00513085 W + Runtime Dynamic = 3.03543 W + + Memory Management Unit: + Area = 1.18028 mm^2 + Peak Dynamic = 0.371691 W + Subthreshold Leakage = 0.0622634 W + Subthreshold Leakage with power gating = 0.0280185 W + Runtime Dynamic = 2.29542 W + + Itlb: + Area = 0.323684 mm^2 + Peak Dynamic = 0.0433015 W + Subthreshold Leakage = 0.0103749 W + Subthreshold Leakage with power gating = 0.0046687 W + Gate Leakage = 0.00276004 W + Runtime Dynamic = 0.692826 W + + Dtlb: + Area = 0.639217 mm^2 + Peak Dynamic = 0.0722582 W + Subthreshold Leakage = 0.0264128 W + Subthreshold Leakage with power gating = 0.0118858 W + Gate Leakage = 0.00693162 W + Runtime Dynamic = 0.578072 W + + Execution Unit: + Area = 5.7838 mm^2 + Peak Dynamic = 2.24145 W + Subthreshold Leakage = 0.525623 W + Subthreshold Leakage with power gating = 0.237404 W + Runtime Dynamic = 8.16202 W + + Register Files: + Area = 0.404016 mm^2 + Peak Dynamic = 0.114143 W + Subthreshold Leakage = 0.00767269 W + Subthreshold Leakage with power gating = 0.00432572 W + Gate Leakage = 0.000807777 W + Runtime Dynamic = 0.581871 W + + Integer RF: + Area = 0.128201 mm^2 + Peak Dynamic = 0.081017 W + Subthreshold Leakage = 0.000627822 W + Subthreshold Leakage with power gating = 0.000325594 W + Gate Leakage = 9.15964e-05 W + Runtime Dynamic = 0.567612 W + + Floating Point RF: + Area = 0.0641005 mm^2 + Peak Dynamic = 0.0331264 W + Subthreshold Leakage = 0.000627822 W + Subthreshold Leakage with power gating = 0.000325594 W + Gate Leakage = 9.15964e-05 W + Runtime Dynamic = 0.0142097 W + + Register Windows: + Area = 0.211715 mm^2 + Peak Dynamic = 0 W + Subthreshold Leakage = 0.00641705 W + Subthreshold Leakage with power gating = 0.00367453 W + Gate Leakage = 0.000624585 W + Runtime Dynamic = 4.9895e-05 W + + Instruction Scheduler: + Area = 0.0508464 mm^2 + Peak Dynamic = 0.063313 W + Subthreshold Leakage = 0.000821652 W + Subthreshold Leakage with power gating = 0.000369744 W + Gate Leakage = 0.000161723 W + Runtime Dynamic = 0.519388 W + + Instruction Window: + Area = 0.0508464 mm^2 + Peak Dynamic = 0.063313 W + Subthreshold Leakage = 0.000821652 W + Subthreshold Leakage with power gating = 0.000369744 W + Gate Leakage = 0.000161723 W + Runtime Dynamic = 0.519388 W + + Integer ALUs (Count: 2 ): + Area = 0.224224 mm^2 + Peak Dynamic = 0.425547 W + Subthreshold Leakage = 0.0739774 W + Subthreshold Leakage with power gating = 0.0332898 W + Gate Leakage = 0.0133396 W + Runtime Dynamic = 3.78264 W + + Floating Point Units (FPUs) (Count: 1 ): + Area = 4.85979 mm^2 + Peak Dynamic = 0.425547 W + Subthreshold Leakage = 0.400843 W + Subthreshold Leakage with power gating = 0.18038 W + Gate Leakage = 0.07228 W + Runtime Dynamic = 0.0709246 W + + Results Broadcast Bus: + Area Overhead = 0.0275346 mm^2 + Peak Dynamic = 0.844065 W + Subthreshold Leakage = 0.0168326 W + Subthreshold Leakage with power gating = 0.00757466 W + Gate Leakage = 0.00303525 W + Runtime Dynamic = 1.73188 W + +***************************************************************************************** +L2 + Area = 9.92498 mm^2 + Peak Dynamic = 1.01718 W + Subthreshold Leakage = 0.347285 W + Subthreshold Leakage with power gating = 0.191634 W + Gate Leakage = 0.0702958 W + Runtime Dynamic = 4.50099 W + +***************************************************************************************** +First Level Directory + Area = 1.07393 mm^2 + Peak Dynamic = 0.254374 W + Subthreshold Leakage = 0.0331505 W + Subthreshold Leakage with power gating = 0.0149177 W + Gate Leakage = 0.0131553 W + Runtime Dynamic = 4.57546 W + +***************************************************************************************** +Memory Controller: + Area = 8.11957 mm^2 + Peak Dynamic = 1.57118 W + Subthreshold Leakage = 0.143079 W + Subthreshold Leakage with power gating = 0.0645254 W + Gate Leakage = 0.0266592 W + Runtime Dynamic = 8.38713 W + + Front End Engine: + Area = 0.300607 mm^2 + Peak Dynamic = 0.148745 W + Subthreshold Leakage = 0.00621973 W + Subthreshold Leakage with power gating = 0.00293846 W + Gate Leakage = 0.00107498 W + Runtime Dynamic = 0.753634 W + + Transaction Engine: + Area = 2.66058 mm^2 + Peak Dynamic = 0.6912 W + Subthreshold Leakage = 0.0465697 W + Subthreshold Leakage with power gating = 0.0209564 W + Gate Leakage = 0.00870562 W + Runtime Dynamic = 3.50205 W + + PHY: + Area = 5.15838 mm^2 + Peak Dynamic = 0.731237 W + Subthreshold Leakage = 0.0902901 W + Subthreshold Leakage with power gating = 0.0406305 W + Gate Leakage = 0.0168786 W + Runtime Dynamic = 4.13145 W + +***************************************************************************************** +NIU: + Area = 7.93167 mm^2 + Peak Dynamic = 0.93241 W + Subthreshold Leakage = 0.178813 W + Subthreshold Leakage with power gating = 0.080466 W + Gate Leakage = 0.0918312 W + Runtime Dynamic = 0.652687 W + +***************************************************************************************** +PCIe: + Area = 6.24 mm^2 + Peak Dynamic = 1.5067 W + Subthreshold Leakage = 0.214091 W + Subthreshold Leakage with power gating = 0.0963408 W + Gate Leakage = 0.109948 W + Runtime Dynamic = 1.05469 W + +***************************************************************************************** +NOC + Area = 10.0461 mm^2 + Peak Dynamic = 1.18259 W + Subthreshold Leakage = 1.62858 W + Subthreshold Leakage with power gating = 0.726753 W + Gate Leakage = 0.391672 W + Runtime Dynamic = 1.18259 W + + Router: + Area = 5.02306 mm^2 + Peak Dynamic = 0.591293 W + Subthreshold Leakage = 0.814291 W + Subthreshold Leakage with power gating = 0.363377 W + Gate Leakage = 0.195836 W + Runtime Dynamic = 1.18259 W + + Virtual Channel Buffer: + Area = 0.902625 mm^2 + Peak Dynamic = 0.0748734 W + Subthreshold Leakage = 0.00763621 W + Subthreshold Leakage with power gating = 0.00038181 W + Gate Leakage = 0.00125194 W + Runtime Dynamic = 0.149747 W + + Crossbar: + Area = 1.69589 mm^2 + Peak Dynamic = 0.511174 W + Subthreshold Leakage = 0.806641 W + Subthreshold Leakage with power gating = 0.362988 W + Gate Leakage = 0.194581 W + Runtime Dynamic = 1.02235 W + + Arbiter: + Peak Dynamic = 0.00524523 W + Subthreshold Leakage = 1.42757e-05 W + Subthreshold Leakage with power gating = 6.42405e-06 W + Gate Leakage = 2.78294e-06 W + Runtime Dynamic = 0.0104905 W + +***************************************************************************************** diff --git a/unit_test/golden/Xeon.golden b/unit_test/golden/Xeon.golden new file mode 100644 index 0000000..6495e90 --- /dev/null +++ b/unit_test/golden/Xeon.golden @@ -0,0 +1,387 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 65 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 3400 + +***************************************************************************************** +Processor: + Area = 410.507 mm^2 + Peak Power = 134.938 W + Total Leakage = 36.8319 W + Peak Dynamic = 98.1063 W + Subthreshold Leakage = 35.1632 W + Subthreshold Leakage with power gating = 16.3977 W + Gate Leakage = 1.66871 W + Runtime Dynamic = 72.9199 W + + Total Cores: 2 cores + Device Type= ITRS high performance device type + Area = 111.713 mm^2 + Peak Dynamic = 78.5978 W + Subthreshold Leakage = 24.1131 W + Subthreshold Leakage with power gating = 10.3006 W + Gate Leakage = 1.49026 W + Runtime Dynamic = 55.7891 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 293.281 mm^2 + Peak Dynamic = 6.70159 W + Subthreshold Leakage = 10.9824 W + Subthreshold Leakage with power gating = 6.06659 W + Gate Leakage = 0.165767 W + Runtime Dynamic = 4.32382 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 5.51364 mm^2 + Peak Dynamic = 12.807 W + Subthreshold Leakage = 0.0678232 W + Subthreshold Leakage with power gating = 0.0305204 W + Gate Leakage = 0.0126787 W + Runtime Dynamic = 12.807 W + +***************************************************************************************** +Core: + Area = 55.8565 mm^2 + Peak Dynamic = 39.2989 W + Subthreshold Leakage = 12.0565 W + Subthreshold Leakage with power gating = 5.15028 W + Gate Leakage = 0.74513 W + Runtime Dynamic = 55.7891 W + + Instruction Fetch Unit: + Area = 8.46345 mm^2 + Peak Dynamic = 5.64559 W + Subthreshold Leakage = 1.25249 W + Subthreshold Leakage with power gating = 0.550703 W + Gate Leakage = 0.0774849 W + Runtime Dynamic = 8.36273 W + + Instruction Cache: + Area = 2.76773 mm^2 + Peak Dynamic = 1.22225 W + Subthreshold Leakage = 0.392547 W + Subthreshold Leakage with power gating = 0.193679 W + Gate Leakage = 0.0188075 W + Runtime Dynamic = 3.62069 W + + Branch Target Buffer: + Area = 0.946863 mm^2 + Peak Dynamic = 0.143479 W + Subthreshold Leakage = 0.053185 W + Subthreshold Leakage with power gating = 0.0286454 W + Gate Leakage = 0.00212441 W + Runtime Dynamic = 0.573914 W + + Branch Predictor: + Area = 0.561919 mm^2 + Peak Dynamic = 0.199098 W + Subthreshold Leakage = 0.0622256 W + Subthreshold Leakage with power gating = 0.0331778 W + Gate Leakage = 0.00281067 W + Runtime Dynamic = 0.190781 W + + Global Predictor: + Area = 0.184006 mm^2 + Peak Dynamic = 0.0632454 W + Subthreshold Leakage = 0.0232971 W + Subthreshold Leakage with power gating = 0.0123351 W + Gate Leakage = 0.000995882 W + Runtime Dynamic = 0.0677377 W + + Local Predictor: + L1_Local Predictor: + Area = 0.0840116 mm^2 + Peak Dynamic = 0.0290083 W + Subthreshold Leakage = 0.00758565 W + Subthreshold Leakage with power gating = 0.00416851 W + Gate Leakage = 0.000375864 W + Runtime Dynamic = 0.0331012 W + + L2_Local Predictor: + Area = 0.0650847 mm^2 + Peak Dynamic = 0.0194608 W + Subthreshold Leakage = 0.00520295 W + Subthreshold Leakage with power gating = 0.00283682 W + Gate Leakage = 0.000275466 W + Runtime Dynamic = 0.0222015 W + + Chooser: + Area = 0.184006 mm^2 + Peak Dynamic = 0.0632454 W + Subthreshold Leakage = 0.0232971 W + Subthreshold Leakage with power gating = 0.0123351 W + Gate Leakage = 0.000995882 W + Runtime Dynamic = 0.0677377 W + + RAS: + Area = 0.0448097 mm^2 + Peak Dynamic = 0.0241382 W + Subthreshold Leakage = 0.0028428 W + Subthreshold Leakage with power gating = 0.00150227 W + Gate Leakage = 0.00016758 W + Runtime Dynamic = 3.11494e-06 W + + Instruction Buffer: + Area = 0.0555667 mm^2 + Peak Dynamic = 1.44022 W + Subthreshold Leakage = 0.00404782 W + Subthreshold Leakage with power gating = 0.00196928 W + Gate Leakage = 0.000202397 W + Runtime Dynamic = 0.960148 W + + Instruction Decoder: + Area = 3.87654 mm^2 + Peak Dynamic = 2.26389 W + Subthreshold Leakage = 0.659852 W + Subthreshold Leakage with power gating = 0.261301 W + Gate Leakage = 0.0381395 W + Runtime Dynamic = 2.26389 W + + Renaming Unit: + Area = 0.583856 mm^2 + Peak Dynamic = 3.32704 W + Subthreshold Leakage = 0.0870274 W + Subthreshold Leakage with power gating = 0.035326 W + Gate Leakage = 0.0158556 W + Runtime Dynamic = 4.61311 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0550773 mm^2 + Peak Dynamic = 1.00881 W + Subthreshold Leakage = 0.00072753 W + Subthreshold Leakage with power gating = 0.000384163 W + Gate Leakage = 5.33898e-05 W + Runtime Dynamic = 1.00127 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.0262203 mm^2 + Peak Dynamic = 0.484298 W + Subthreshold Leakage = 0.000517404 W + Subthreshold Leakage with power gating = 0.000267209 W + Gate Leakage = 5.24626e-05 W + Runtime Dynamic = 0.240316 W + + Free List: + Area = 0.134958 mm^2 + Peak Dynamic = 0.138612 W + Subthreshold Leakage = 0.00229689 W + Subthreshold Leakage with power gating = 0.00124314 W + Gate Leakage = 0.00013679 W + Runtime Dynamic = 0.249121 W + + Int Retire RAT: + Area = 0.0326341 mm^2 + Peak Dynamic = 0.0531434 W + Subthreshold Leakage = 0.00053943 W + Subthreshold Leakage with power gating = 0.000282205 W + Gate Leakage = 5.41145e-05 W + Runtime Dynamic = 0.0492425 W + + FP Retire RAT: + Area = 0.0139279 mm^2 + Peak Dynamic = 0.0274512 W + Subthreshold Leakage = 0.000578323 W + Subthreshold Leakage with power gating = 0.000302886 W + Gate Leakage = 4.95235e-05 W + Runtime Dynamic = 0.0114048 W + + FP Free List: + Area = 0.0662033 mm^2 + Peak Dynamic = 0.108106 W + Subthreshold Leakage = 0.00173675 W + Subthreshold Leakage with power gating = 0.000916449 W + Gate Leakage = 0.0001089 W + Runtime Dynamic = 0.0485337 W + + Load Store Unit: + Area = 4.40911 mm^2 + Peak Dynamic = 3.92645 W + Subthreshold Leakage = 0.444866 W + Subthreshold Leakage with power gating = 0.183444 W + Gate Leakage = 0.0406801 W + Runtime Dynamic = 5.41973 W + + Data Cache: + Area = 2.44703 mm^2 + Peak Dynamic = 2.5367 W + Subthreshold Leakage = 0.264874 W + Subthreshold Leakage with power gating = 0.112167 W + Gate Leakage = 0.0180265 W + Runtime Dynamic = 3.09654 W + + LoadQ: + Area = 0.622818 mm^2 + Peak Dynamic = 0.456307 W + Subthreshold Leakage = 0.0459168 W + Subthreshold Leakage with power gating = 0.0181831 W + Gate Leakage = 0.00341772 W + Runtime Dynamic = 0.456307 W + + StoreQ: + Area = 0.706764 mm^2 + Peak Dynamic = 0.556789 W + Subthreshold Leakage = 0.0534447 W + Subthreshold Leakage with power gating = 0.0211641 W + Gate Leakage = 0.00383548 W + Runtime Dynamic = 1.11358 W + + Memory Management Unit: + Area = 0.791341 mm^2 + Peak Dynamic = 1.1714 W + Subthreshold Leakage = 0.126644 W + Subthreshold Leakage with power gating = 0.0501508 W + Runtime Dynamic = 3.72143 W + + Itlb: + Area = 0.132796 mm^2 + Peak Dynamic = 0.0637919 W + Subthreshold Leakage = 0.0152786 W + Subthreshold Leakage with power gating = 0.00605034 W + Gate Leakage = 0.00104433 W + Runtime Dynamic = 0.51034 W + + Dtlb: + Area = 0.403709 mm^2 + Peak Dynamic = 0.165977 W + Subthreshold Leakage = 0.0307338 W + Subthreshold Leakage with power gating = 0.0121706 W + Gate Leakage = 0.00214587 W + Runtime Dynamic = 1.32782 W + + Execution Unit: + Area = 16.5331 mm^2 + Peak Dynamic = 22.0628 W + Subthreshold Leakage = 3.371 W + Subthreshold Leakage with power gating = 1.34466 W + Runtime Dynamic = 26.4414 W + + Register Files: + Area = 4.1346 mm^2 + Peak Dynamic = 5.62196 W + Subthreshold Leakage = 0.0611201 W + Subthreshold Leakage with power gating = 0.0319871 W + Gate Leakage = 0.00345478 W + Runtime Dynamic = 2.69803 W + + Integer RF: + Area = 2.77222 mm^2 + Peak Dynamic = 4.72439 W + Subthreshold Leakage = 0.0373563 W + Subthreshold Leakage with power gating = 0.0193618 W + Gate Leakage = 0.00215324 W + Runtime Dynamic = 2.40781 W + + Floating Point RF: + Area = 1.36237 mm^2 + Peak Dynamic = 0.897577 W + Subthreshold Leakage = 0.0237638 W + Subthreshold Leakage with power gating = 0.0126253 W + Gate Leakage = 0.00130154 W + Runtime Dynamic = 0.290229 W + + Instruction Scheduler: + Area = 1.3266 mm^2 + Peak Dynamic = 3.25577 W + Subthreshold Leakage = 0.0394024 W + Subthreshold Leakage with power gating = 0.0175674 W + Gate Leakage = 0.00270753 W + Runtime Dynamic = 3.744 W + + Instruction Window: + Area = 0.295422 mm^2 + Peak Dynamic = 1.01014 W + Subthreshold Leakage = 0.0135008 W + Subthreshold Leakage with power gating = 0.00534633 W + Gate Leakage = 0.00105709 W + Runtime Dynamic = 1.31235 W + + FP Instruction Window: + Area = 0.138043 mm^2 + Peak Dynamic = 0.630531 W + Subthreshold Leakage = 0.0102223 W + Subthreshold Leakage with power gating = 0.00404803 W + Gate Leakage = 0.000801924 W + Runtime Dynamic = 0.816561 W + + ROB: + Area = 0.893137 mm^2 + Peak Dynamic = 1.6151 W + Subthreshold Leakage = 0.0156793 W + Subthreshold Leakage with power gating = 0.00817299 W + Gate Leakage = 0.000848517 W + Runtime Dynamic = 1.6151 W + + Integer ALUs (Count: 6 ): + Area = 0.672672 mm^2 + Peak Dynamic = 4.55818 W + Subthreshold Leakage = 0.599188 W + Subthreshold Leakage with power gating = 0.237278 W + Gate Leakage = 0.0454759 W + Runtime Dynamic = 2.33394 W + + Floating Point Units (FPUs) (Count: 2 ): + Area = 9.71959 mm^2 + Peak Dynamic = 1.43327 W + Subthreshold Leakage = 2.16445 W + Subthreshold Leakage with power gating = 0.857121 W + Gate Leakage = 0.164273 W + Runtime Dynamic = 2.55333 W + + Complex ALUs (Mul/Div) (Count: 1 ): + Area = 0.336336 mm^2 + Peak Dynamic = 0.510666 W + Subthreshold Leakage = 0.299594 W + Subthreshold Leakage with power gating = 0.118639 W + Gate Leakage = 0.0227379 W + Runtime Dynamic = 3.18505 W + + Results Broadcast Bus: + Area Overhead = 0.0884938 mm^2 + Peak Dynamic = 5.17636 W + Subthreshold Leakage = 0.126618 W + Subthreshold Leakage with power gating = 0.0501405 W + Gate Leakage = 0.00960975 W + Runtime Dynamic = 8.91383 W + + L2 + Area = 16.0033 mm^2 + Peak Dynamic = 3.16559 W + Subthreshold Leakage = 2.73387 W + Subthreshold Leakage with power gating = 1.3859 W + Gate Leakage = 0.0221925 W + Runtime Dynamic = 7.23071 W + +***************************************************************************************** + L3 + Area = 293.281 mm^2 + Peak Dynamic = 6.70159 W + Subthreshold Leakage = 10.9824 W + Subthreshold Leakage with power gating = 6.06659 W + Gate Leakage = 0.165767 W + Runtime Dynamic = 4.32382 W + +***************************************************************************************** +BUSES + Area = 5.51364 mm^2 + Peak Dynamic = 12.807 W + Subthreshold Leakage = 0.0678232 W + Subthreshold Leakage with power gating = 0.0305204 W + Gate Leakage = 0.0126787 W + Runtime Dynamic = 12.807 W + + Bus: + Area = 5.51364 mm^2 + Peak Dynamic = 12.807 W + Subthreshold Leakage = 0.0678232 W + Subthreshold Leakage with power gating = 0.0305204 W + Gate Leakage = 0.0126787 W + Runtime Dynamic = 12.807 W + +***************************************************************************************** diff --git a/unit_test/input/ARM_A9_2GHz.xml b/unit_test/input/ARM_A9_2GHz.xml new file mode 100644 index 0000000..b97ddd9 --- /dev/null +++ b/unit_test/input/ARM_A9_2GHz.xml @@ -0,0 +1,442 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/ARM_A9_2GHz_withIOC.xml b/unit_test/input/ARM_A9_2GHz_withIOC.xml new file mode 100644 index 0000000..e8099b4 --- /dev/null +++ b/unit_test/input/ARM_A9_2GHz_withIOC.xml @@ -0,0 +1,442 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Alpha21364.xml b/unit_test/input/Alpha21364.xml new file mode 100644 index 0000000..78d91fa --- /dev/null +++ b/unit_test/input/Alpha21364.xml @@ -0,0 +1,436 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Niagara1.xml b/unit_test/input/Niagara1.xml new file mode 100644 index 0000000..ded4dd2 --- /dev/null +++ b/unit_test/input/Niagara1.xml @@ -0,0 +1,429 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Niagara1_sharing_DC.xml b/unit_test/input/Niagara1_sharing_DC.xml new file mode 100644 index 0000000..fa068d5 --- /dev/null +++ b/unit_test/input/Niagara1_sharing_DC.xml @@ -0,0 +1,440 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Niagara1_sharing_SBT.xml b/unit_test/input/Niagara1_sharing_SBT.xml new file mode 100644 index 0000000..9135b0b --- /dev/null +++ b/unit_test/input/Niagara1_sharing_SBT.xml @@ -0,0 +1,441 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Niagara1_sharing_ST.xml b/unit_test/input/Niagara1_sharing_ST.xml new file mode 100644 index 0000000..e5f2d2a --- /dev/null +++ b/unit_test/input/Niagara1_sharing_ST.xml @@ -0,0 +1,432 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Niagara2.xml b/unit_test/input/Niagara2.xml new file mode 100644 index 0000000..c6c152f --- /dev/null +++ b/unit_test/input/Niagara2.xml @@ -0,0 +1,427 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Penryn.xml b/unit_test/input/Penryn.xml new file mode 100644 index 0000000..d7b858f --- /dev/null +++ b/unit_test/input/Penryn.xml @@ -0,0 +1,446 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/Xeon.xml b/unit_test/input/Xeon.xml new file mode 100644 index 0000000..7a16b62 --- /dev/null +++ b/unit_test/input/Xeon.xml @@ -0,0 +1,454 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py new file mode 100755 index 0000000..16059d3 --- /dev/null +++ b/unit_test/unit_test.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 Andrew Smith +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +import os +import subprocess +import argparse +import datetime +import shutil +import re +import difflib +import glob +from threading import Timer + +start = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +verbose = True +debug = True +quiet = False +timeout_limit = 120.0 +kill_flag = False + +input_path = "./input" +output_path = "./output" +golden_path = "./golden" + + +def print_info(info, *args): + if verbose: + print("[ " + __file__ + " ] " + info + " " + + " ".join([str(x) for x in args])) + elif not quiet: + print("[ " + __file__ + " ] " + info) + + +def print_pass(vector, *args): + if verbose: + print("\033[32m[ " + __file__ + " ] PASS:\033[00m " + vector + " " + + " ".join([str(x) for x in args])) + elif not quiet: + print("\033[32m[ " + __file__ + " ] PASS:\033[00m " + vector) + + +def print_fail(vector, *args): + if verbose: + print("\033[31m[ " + __file__ + " ] FAIL:\033[00m " + vector + " " + + " ".join([str(x) for x in args])) + elif not quiet: + print("\033[31m[ " + __file__ + " ] FAIL:\033[00m " + vector) + + +def print_results(passed, failed, total): + if not quiet: + print("[ " + __file__ + " ] Passed: \033[32m" + str(passed) + + "\033[00m; Failed: \033[31m" + str(failed) + + "\033[00m; Total Vectors: " + str(total)) + + +def kill(p): + global kill_flag + kill_flag = True + try: + p.kill() + except OSError: + pass + + +def diff_result(vector): + outfile = os.path.join("./output", vector + ".out") + difffile = os.path.join("./output", vector + ".diff") + goldfile = os.path.join("./golden", vector + ".golden") + with open(outfile, "r") as o, open(goldfile, "r") as g: + outlines = o.readlines() + goldlines = g.readlines() + result = list(difflib.unified_diff(outlines, goldlines)) + with open(difffile, "w") as d: + d.writelines(result) + if len(result) == 0: + return 0 + return 1 + + +def run_test(vector): + global kill_flag + kill_flag = False + infile = os.path.join(input_path, vector + ".xml") + stdo = os.path.join(output_path, vector + ".out") + stde = os.path.join(output_path, vector + ".err") + with open(stdo, "w") as so, open(stde, "w") as se: + p = subprocess.Popen([ + "../mcpat", "-infile", infile, "-print_level", "5", "-opt_for_clk", "1" + ], + stdout=so, + stderr=se) + t = Timer(timeout_limit, kill, [p]) + t.start() + p.wait() + t.cancel() + if kill_flag: + print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") + return 1 + else: + if diff_result(vector) == 0: + print_pass(vector) + return 0 + else: + print_fail( + vector, + "The files " + vector + ".out and " + vector + ".golden differ") + return 1 + return 0 + + +def get_vectors(): + files = glob.glob(os.path.join(input_path, "*")) + vectors = sorted([os.path.basename(f).split(".")[0] for f in files]) + return vectors + + +if __name__ == "__main__": + p = 0 + f = 0 + print_info(start) + vectors = get_vectors() + print_info("Found " + str(len(vectors)) + " test vectors") + for vector in vectors: + if run_test(vector) == 0: + p += 1 + else: + f += 1 + print_results(p, f, len(vectors)) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh new file mode 100755 index 0000000..3dbee0b --- /dev/null +++ b/unit_test/unit_test.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# +# Copyright (c) 2020 Andrew Smith +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +script_name="unit_test.sh" + +print_info () { + echo -e "[ $script_name ] $1" +} + +print_pass () { + green="\e[32m" + nc="\e[0m" + echo -e "$green[ $script_name ] PASS:$nc $1" +} + +print_error () { + red="\e[31m" + nc="\e[0m" + echo -e "$red[ $script_name ] ERROR:$nc $1" +} + +print_test_results () { + green="\e[32m" + red="\e[31m" + nc="\e[0m" + echo -e "[ $script_name ] Passed $green$1$nc; Failed $red$2$nc; out of $3 Unit Tests" +} + + +#-------------------------------------------------------------------- +# Output Directories +# ___ _ _ _____ ____ _ _ _____ ____ ___ ____ +# / _ \| | | |_ _| _ \| | | |_ _| | _ \_ _| _ \ +# | | | | | | | | | | |_) | | | | | | | | | | || |_) | +# | |_| | |_| | | | | __/| |_| | | | | |_| | || _ < +# \___/ \___/ |_| |_| \___/ |_| |____/___|_| \_\ +# +#-------------------------------------------------------------------- +OUTPUT="./output" +if [ ! -d $OUTPUT ]; then + print_info "Creating $OUTPUT" + mkdir -p $OUTPUT +else + print_info "Cleaning $OUTPUT" + rm -f $OUTPUT/* +fi + +GOLDEN="./golden" + +#-------------------------------------------------------------------- +# Run Tests +# _____ _____ ____ _____ ____ +# |_ _| ____/ ___|_ _/ ___| +# | | | _| \___ \ | | \___ \ +# | | | |___ ___) || | ___) | +# |_| |_____|____/ |_| |____/ +# +#-------------------------------------------------------------------- +INPUT="./input" +PASS_COUNT=0 +TOTAL_COUNT=0 +FAIL_COUNT=0 +for t in $(ls $INPUT); do + test_name=$(basename $t .xml) + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + ../mcpat -infile $INPUT/$test_name.xml -print_level 5 -opt_for_clk 1 > $OUTPUT/$test_name.out 2> $OUTPUT/$test_name.err + if [ -s $OUTPUT/$test_name.err ] || [ ! -s $OUTPUT/$test_name.out ]; + then + print_error "$test_name; check $OUTPUT/$test_name.err" + FAIL_COUNT=$((FAIL_COUNT + 1)) + else + if [ $(grep -rnI "nan\|inf" $OUTPUT/${test_name}.out | wc -l) -ne 0 ]; + then + print_pass "$test_name; nan, inf present in output; check $OUTPUT/$test_name.out" + FAIL_COUNT=$((FAIL_COUNT + 1)) + else + if [ $(diff $GOLDEN/$test_name.golden $OUTPUT/$test_name.out | wc -l) -eq 0 ]; + then + print_pass "$test_name" + PASS_COUNT=$((PASS_COUNT + 1)) + else + print_error "$test_name; output differs from golden output" + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi + fi + fi +done +print_test_results $PASS_COUNT $FAIL_COUNT $TOTAL_COUNT From 02b8be9dd52804db3a8fa196beaea316dd5a368e Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 8 Jun 2020 19:04:34 -0500 Subject: [PATCH 02/59] git-hooks: Used clang-format to stylize the code base based on the LLVM Style Guides. --- .clang-format | 137 + XML_Parse.cc | 5559 ++++++++++++++------- XML_Parse.h | 1060 ++-- arch_const.h | 358 +- array.cc | 561 +-- array.h | 92 +- basic_components.cc | 170 +- basic_components.h | 357 +- cacti/Ucache.cc | 1465 +++--- cacti/Ucache.h | 111 +- cacti/arbiter.cc | 130 +- cacti/arbiter.h | 56 +- cacti/area.cc | 11 +- cacti/area.h | 26 +- cacti/bank.cc | 276 +- cacti/bank.h | 52 +- cacti/basic_circuit.cc | 1254 +++-- cacti/basic_circuit.h | 321 +- cacti/cacti_interface.cc | 232 +- cacti/cacti_interface.h | 712 ++- cacti/component.cc | 184 +- cacti/component.h | 55 +- cacti/const.h | 165 +- cacti/crossbar.cc | 171 +- cacti/crossbar.h | 64 +- cacti/decoder.cc | 1908 ++++---- cacti/decoder.h | 227 +- cacti/htree2.cc | 880 ++-- cacti/htree2.h | 98 +- cacti/io.cc | 5360 +++++++++++---------- cacti/io.h | 10 +- cacti/main.cc | 193 +- cacti/mat.cc | 3209 +++++++------ cacti/mat.h | 259 +- cacti/nuca.cc | 427 +- cacti/nuca.h | 98 +- cacti/parameter.cc | 1119 ++--- cacti/parameter.h | 300 +- cacti/powergating.cc | 191 +- cacti/powergating.h | 73 +- cacti/router.cc | 293 +- cacti/router.h | 131 +- cacti/subarray.cc | 290 +- cacti/subarray.h | 47 +- cacti/technology.cc | 4070 ++++++++-------- cacti/uca.cc | 591 +-- cacti/uca.h | 107 +- cacti/version_cacti.h | 11 +- cacti/wire.cc | 1063 ++-- cacti/wire.h | 160 +- core.cc | 9852 ++++++++++++++++++++++---------------- core.h | 408 +- globalvar.h | 7 +- interconnect.cc | 193 +- interconnect.h | 107 +- iocontrollers.cc | 943 ++-- iocontrollers.h | 71 +- logic.cc | 2101 ++++---- logic.h | 333 +- main.cc | 105 +- memoryctrl.cc | 1545 +++--- memoryctrl.h | 120 +- noc.cc | 813 ++-- noc.h | 69 +- processor.cc | 1765 ++++--- processor.h | 67 +- sharedcache.cc | 1960 ++++---- sharedcache.h | 82 +- util/format.sh | 7 + util/run-clang-tidy.py | 326 ++ version.h | 6 +- xmlParser.cc | 5516 +++++++++++---------- xmlParser.h | 1377 +++--- 73 files changed, 34245 insertions(+), 28222 deletions(-) create mode 100644 .clang-format mode change 100755 => 100644 cacti/bank.cc mode change 100755 => 100644 cacti/bank.h mode change 100755 => 100644 cacti/mat.cc mode change 100755 => 100644 cacti/mat.h mode change 100755 => 100644 cacti/subarray.cc mode change 100755 => 100644 cacti/subarray.h mode change 100755 => 100644 cacti/uca.cc mode change 100755 => 100644 cacti/uca.h create mode 100755 util/format.sh create mode 100755 util/run-clang-tidy.py diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..765a1ab --- /dev/null +++ b/.clang-format @@ -0,0 +1,137 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Always + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + - Regex: '.*' + Priority: 1 + SortPriority: 0 +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentGotoLabels: true +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Latest +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 2 +UseCRLF: false +UseTab: Never +... + diff --git a/XML_Parse.cc b/XML_Parse.cc index d6df351..71796b4 100644 --- a/XML_Parse.cc +++ b/XML_Parse.cc @@ -29,1825 +29,3816 @@ * ***************************************************************************/ +#include "XML_Parse.h" -#include #include "xmlParser.h" -#include -#include "XML_Parse.h" + #include +#include +#include using namespace std; -void ParseXML::parse(char* filepath) -{ - unsigned int i,j,k,m,n; - unsigned int NumofCom_4; - unsigned int itmp; - //Initialize all structures - ParseXML::initialize(); - - // this open and parse the XML file: - XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer - - XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer - //get all params in the second layer - itmp=xNode2.nChildNode("param"); - for(i=0; iOrderofComponents_3layer) - { - //___________________________get all system.core0-n________________________________________________ - if (sys.homogeneous_cores==1) OrderofComponents_3layer=0; - else OrderofComponents_3layer=sys.number_of_cores-1; - for (i=0; i<=OrderofComponents_3layer; i++) - { - xNode3=xNode2.getChildNode("component",i); - if (xNode3.isEmpty()==1) { - printf("The value of homogeneous_cores or number_of_cores is not correct!"); - exit(0); - } - else{ - if (strstr(xNode3.getAttribute("name"),"core")!=NULL) - { - { //For cpu0-cpui - //Get all params with system.core? - itmp=xNode3.nChildNode("param"); - for(k=0; k OrderofComponents_3layer) { + //___________________________get all + // system.core0-n________________________________________________ + if (sys.homogeneous_cores == 1) + OrderofComponents_3layer = 0; + else + OrderofComponents_3layer = sys.number_of_cores - 1; + for (i = 0; i <= OrderofComponents_3layer; i++) { + xNode3 = xNode2.getChildNode("component", i); + if (xNode3.isEmpty() == 1) { + printf("The value of homogeneous_cores or number_of_cores is not " + "correct!"); + exit(0); + } else { + if (strstr(xNode3.getAttribute("name"), "core") != NULL) { + { // For cpu0-cpui + // Get all params with system.core? + itmp = xNode3.nChildNode("param"); + for (k = 0; k < itmp; k++) { + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "clock_rate") == 0) { + sys.core[i].clock_rate = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "opt_local") == 0) { + sys.core[i].opt_local = (bool)atoi( + xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "x86") == 0) { + sys.core[i].x86 = (bool)atoi( + xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "machine_bits") == 0) { + sys.core[i].machine_bits = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "virtual_address_width") == 0) { + sys.core[i].virtual_address_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "physical_address_width") == 0) { + sys.core[i].physical_address_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "instruction_length") == 0) { + sys.core[i].instruction_length = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "opcode_width") == 0) { + sys.core[i].opcode_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "micro_opcode_width") == 0) { + sys.core[i].micro_opcode_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "machine_type") == 0) { + sys.core[i].machine_type = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "internal_datapath_width") == 0) { + sys.core[i].internal_datapath_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_hardware_threads") == 0) { + sys.core[i].number_hardware_threads = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "fetch_width") == 0) { + sys.core[i].fetch_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_instruction_fetch_ports") == 0) { + sys.core[i].number_instruction_fetch_ports = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "decode_width") == 0) { + sys.core[i].decode_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "issue_width") == 0) { + sys.core[i].issue_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "peak_issue_width") == 0) { + sys.core[i].peak_issue_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "commit_width") == 0) { + sys.core[i].commit_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "fp_issue_width") == 0) { + sys.core[i].fp_issue_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "prediction_width") == 0) { + sys.core[i].prediction_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "vdd") == 0) { + sys.core[i].vdd = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "power_gating_vcc") == 0) { + sys.core[i].power_gating_vcc = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } - for (i=0; i<(OrderofComponents_3layer-tmpOrderofComponents_3layer); i++) - { - xNode3=xNode2.getChildNode("component",w); - if (xNode3.isEmpty()==1) { - printf("The value of homogeneous_L2Directories or number_of_L2Directories is not correct!"); - exit(0); - } - else - { - if (strstr(xNode3.getAttribute("id"),"L2Directory")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; -// xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); -// if (xNode3.isEmpty()==1) { -// printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); -// exit(0); -// } -// if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL) -// { -// -// itmp=xNode3.nChildNode("param"); -// for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) + // OrderofComponents_3layer=OrderofComponents_3layer+1; + // xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); + // if (xNode3.isEmpty()==1) { + // printf("some value(s) of + // number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not + // correct!"); exit(0); + // } + // if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL) + // { + // + // itmp=xNode3.nChildNode("param"); + // for(k=0; k 0) + OrderofComponents_3layer = OrderofComponents_3layer + 1; + xNode3 = xNode2.getChildNode("component", OrderofComponents_3layer); + if (xNode3.isEmpty() == 1) { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + if (strstr(xNode3.getAttribute("id"), "system.mc") != NULL) { + itmp = xNode3.nChildNode("param"); + for (k = 0; k < itmp; k++) { // get all items of param in system.mem + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "mc_clock") == 0) { + sys.mc.mc_clock = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "vdd") == 0) { + sys.mc.vdd = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "power_gating_vcc") == 0) { + sys.mc.power_gating_vcc = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "block_size") == 0) { + sys.mc.llc_line_length = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_mcs") == 0) { + sys.mc.number_mcs = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "memory_channels_per_mc") == 0) { + sys.mc.memory_channels_per_mc = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "req_window_size_per_channel") == 0) { + sys.mc.req_window_size_per_channel = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "IO_buffer_size_per_channel") == 0) { + sys.mc.IO_buffer_size_per_channel = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "databus_width") == 0) { + sys.mc.databus_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "addressbus_width") == 0) { + sys.mc.addressbus_width = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "peak_transfer_rate") == 0) { + sys.mc.peak_transfer_rate = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_ranks") == 0) { + sys.mc.number_ranks = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "LVDS") == 0) { + sys.mc.LVDS = + (bool)atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "type") == 0) { + sys.mc.type = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "withPHY") == 0) { + sys.mc.withPHY = + (bool)atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + } + itmp = xNode3.nChildNode("stat"); + for (k = 0; k < itmp; + k++) { // get all items of stat in system.mendirectory + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "memory_accesses") == 0) { + sys.mc.memory_accesses = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "memory_reads") == 0) { + sys.mc.memory_reads = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "memory_writes") == 0) { + sys.mc.memory_writes = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + } + } else { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + //__________________________________________Get + // system.niu____________________________________________ + if (OrderofComponents_3layer > 0) + OrderofComponents_3layer = OrderofComponents_3layer + 1; + xNode3 = xNode2.getChildNode("component", OrderofComponents_3layer); + if (xNode3.isEmpty() == 1) { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + if (strstr(xNode3.getAttribute("id"), "system.niu") != NULL) { + itmp = xNode3.nChildNode("param"); + for (k = 0; k < itmp; k++) { // get all items of param in system.mem + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "clockrate") == 0) { + sys.niu.clockrate = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_units") == 0) { + sys.niu.number_units = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "type") == 0) { + sys.niu.type = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "vdd") == 0) { + sys.niu.vdd = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "power_gating_vcc") == 0) { + sys.niu.power_gating_vcc = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + } + itmp = xNode3.nChildNode("stat"); + for (k = 0; k < itmp; + k++) { // get all items of stat in system.mendirectory + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "duty_cycle") == 0) { + sys.niu.duty_cycle = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "total_load_perc") == 0) { + sys.niu.total_load_perc = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + } + } else { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } - } + //__________________________________________Get + // system.pcie____________________________________________ + if (OrderofComponents_3layer > 0) + OrderofComponents_3layer = OrderofComponents_3layer + 1; + xNode3 = xNode2.getChildNode("component", OrderofComponents_3layer); + if (xNode3.isEmpty() == 1) { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + if (strstr(xNode3.getAttribute("id"), "system.pcie") != NULL) { + itmp = xNode3.nChildNode("param"); + for (k = 0; k < itmp; k++) { // get all items of param in system.mem + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "clockrate") == 0) { + sys.pcie.clockrate = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_units") == 0) { + sys.pcie.number_units = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "num_channels") == 0) { + sys.pcie.num_channels = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "type") == 0) { + sys.pcie.type = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "vdd") == 0) { + sys.pcie.vdd = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "power_gating_vcc") == 0) { + sys.pcie.power_gating_vcc = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "withPHY") == 0) { + sys.pcie.withPHY = + (bool)atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + } + itmp = xNode3.nChildNode("stat"); + for (k = 0; k < itmp; + k++) { // get all items of stat in system.mendirectory + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "duty_cycle") == 0) { + sys.pcie.duty_cycle = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "total_load_perc") == 0) { + sys.pcie.total_load_perc = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + } + } else { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + //__________________________________________Get + // system.flashcontroller____________________________________________ + if (OrderofComponents_3layer > 0) + OrderofComponents_3layer = OrderofComponents_3layer + 1; + xNode3 = xNode2.getChildNode("component", OrderofComponents_3layer); + if (xNode3.isEmpty() == 1) { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + if (strstr(xNode3.getAttribute("id"), "system.flashc") != NULL) { + itmp = xNode3.nChildNode("param"); + for (k = 0; k < itmp; k++) { // get all items of param in system.mem + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"flashc_clock")==0) + //{sys.flashc.mc_clock=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"block_size")==0) + //{sys.flashc.llc_line_length=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "number_flashcs") == 0) { + sys.flashc.number_mcs = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "vdd") == 0) { + sys.flashc.vdd = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "power_gating_vcc") == 0) { + sys.flashc.power_gating_vcc = + atof(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"memory_channels_per_flashc")==0) + //{sys.flashc.memory_channels_per_mc=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"req_window_size_per_channel")==0) + //{sys.flashc.req_window_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"IO_buffer_size_per_channel")==0) + //{sys.flashc.IO_buffer_size_per_channel=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"databus_width")==0) + //{sys.flashc.databus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"addressbus_width")==0) + //{sys.flashc.addressbus_width=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "peak_transfer_rate") == 0) { + sys.flashc.peak_transfer_rate = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"number_ranks")==0) + //{sys.flashc.number_ranks=atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("param",k).getAttribute("name"),"LVDS")==0) + //{sys.flashc.LVDS=(bool)atoi(xNode3.getChildNode("param",k).getAttribute("value"));continue;} + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "type") == 0) { + sys.flashc.type = + atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("param", k).getAttribute("name"), + "withPHY") == 0) { + sys.flashc.withPHY = + (bool)atoi(xNode3.getChildNode("param", k).getAttribute("value")); + continue; + } + } + itmp = xNode3.nChildNode("stat"); + for (k = 0; k < itmp; + k++) { // get all items of stat in system.mendirectory + // if + //(strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_accesses")==0) + //{sys.flashc.memory_accesses=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_reads")==0) + //{sys.flashc.memory_reads=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;} + // if + //(strcmp(xNode3.getChildNode("stat",k).getAttribute("name"),"memory_writes")==0) + //{sys.flashc.memory_writes=atof(xNode3.getChildNode("stat",k).getAttribute("value"));continue;} + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "duty_cycle") == 0) { + sys.flashc.duty_cycle = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + if (strcmp(xNode3.getChildNode("stat", k).getAttribute("name"), + "total_load_perc") == 0) { + sys.flashc.total_load_perc = + atof(xNode3.getChildNode("stat", k).getAttribute("value")); + continue; + } + } + } else { + printf("some value(s) of " + "number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs " + "is/are not correct!"); + exit(0); + } + } } -void ParseXML::initialize() //Initialize all +void ParseXML::initialize() // Initialize all { - //All number_of_* at the level of 'system' 03/21/2009 - sys.number_of_cores=1; - sys.number_of_L1Directories=1; - sys.number_of_L2Directories=1; - sys.number_of_L2s=1; - sys.Private_L2 = false; - sys.number_of_L3s=1; - sys.number_of_NoCs=1; - // All params at the level of 'system' - //strcpy(sys.homogeneous_cores,"default"); - sys.core_tech_node=1; - sys.target_core_clockrate=1; - sys.target_chip_area=1; - sys.temperature=360; - sys.number_cache_levels=1; - sys.homogeneous_cores=1; - sys.homogeneous_L1Directories=1; - sys.homogeneous_L2Directories=1; - sys.homogeneous_L2s=1; - sys.homogeneous_L3s=1; - sys.homogeneous_NoCs=1; - sys.homogeneous_ccs=1; + // All number_of_* at the level of 'system' 03/21/2009 + sys.number_of_cores = 1; + sys.number_of_L1Directories = 1; + sys.number_of_L2Directories = 1; + sys.number_of_L2s = 1; + sys.Private_L2 = false; + sys.number_of_L3s = 1; + sys.number_of_NoCs = 1; + // All params at the level of 'system' + // strcpy(sys.homogeneous_cores,"default"); + sys.core_tech_node = 1; + sys.target_core_clockrate = 1; + sys.target_chip_area = 1; + sys.temperature = 360; + sys.number_cache_levels = 1; + sys.homogeneous_cores = 1; + sys.homogeneous_L1Directories = 1; + sys.homogeneous_L2Directories = 1; + sys.homogeneous_L2s = 1; + sys.homogeneous_L3s = 1; + sys.homogeneous_NoCs = 1; + sys.homogeneous_ccs = 1; - sys.Max_area_deviation=1; - sys.Max_power_deviation=1; - sys.device_type=1; - sys.longer_channel_device =true; - sys.power_gating =false; - sys.Embedded =false; - sys.opt_dynamic_power=false; - sys.opt_lakage_power=false; - sys.opt_clockrate=true; - sys.opt_area=false; - sys.interconnect_projection_type=1; - sys.vdd =0; - sys.power_gating_vcc = -1; - int i,j; - for (i=0; i<=63; i++) - { - sys.core[i].vdd =0; - sys.core[i].power_gating_vcc = -1; - sys.core[i].clock_rate=1; - sys.core[i].opt_local = true; - sys.core[i].x86 = false; - sys.core[i].machine_bits=1; - sys.core[i].virtual_address_width=1; - sys.core[i].physical_address_width=1; - sys.core[i].opcode_width=1; - sys.core[i].micro_opcode_width=1; - //strcpy(sys.core[i].machine_type,"default"); - sys.core[i].internal_datapath_width=1; - sys.core[i].number_hardware_threads=1; - sys.core[i].fetch_width=1; - sys.core[i].number_instruction_fetch_ports=1; - sys.core[i].decode_width=1; - sys.core[i].issue_width=1; - sys.core[i].peak_issue_width=1; - sys.core[i].commit_width=1; - for (j=0; j<20; j++) sys.core[i].pipelines_per_core[j]=1; - for (j=0; j<20; j++) sys.core[i].pipeline_depth[j]=1; - strcpy(sys.core[i].FPU,"default"); - strcpy(sys.core[i]. divider_multiplier,"default"); - sys.core[i].ALU_per_core=1; - sys.core[i].FPU_per_core=1.0; - sys.core[i].MUL_per_core=1; - sys.core[i].instruction_buffer_size=1; - sys.core[i].decoded_stream_buffer_size=1; - //strcpy(sys.core[i].instruction_window_scheme,"default"); - sys.core[i].instruction_window_size=1; - sys.core[i].ROB_size=1; - sys.core[i].archi_Regs_IRF_size=1; - sys.core[i].archi_Regs_FRF_size=1; - sys.core[i].phy_Regs_IRF_size=1; - sys.core[i].phy_Regs_FRF_size=1; - //strcpy(sys.core[i].rename_scheme,"default"); - sys.core[i].checkpoint_depth=1; - sys.core[i].register_windows_size=1; - strcpy(sys.core[i].LSU_order,"default"); - sys.core[i].store_buffer_size=1; - sys.core[i].load_buffer_size=1; - sys.core[i].memory_ports=1; - strcpy(sys.core[i].Dcache_dual_pump,"default"); - sys.core[i].RAS_size=1; - //all stats at the level of system.core(0-n) - sys.core[i].total_instructions=1; - sys.core[i].int_instructions=1; - sys.core[i].fp_instructions=1; - sys.core[i].branch_instructions=1; - sys.core[i].branch_mispredictions=1; - sys.core[i].committed_instructions=1; - sys.core[i].load_instructions=1; - sys.core[i].store_instructions=1; - sys.core[i].total_cycles=1; - sys.core[i].idle_cycles=1; - sys.core[i].busy_cycles=1; - sys.core[i].instruction_buffer_reads=1; - sys.core[i].instruction_buffer_write=1; - sys.core[i].ROB_reads=1; - sys.core[i].ROB_writes=1; - sys.core[i].rename_accesses=1; - sys.core[i].inst_window_reads=1; - sys.core[i].inst_window_writes=1; - sys.core[i].inst_window_wakeup_accesses=1; - sys.core[i].inst_window_selections=1; - sys.core[i].archi_int_regfile_reads=1; - sys.core[i].archi_float_regfile_reads=1; - sys.core[i].phy_int_regfile_reads=1; - sys.core[i].phy_float_regfile_reads=1; - sys.core[i].windowed_reg_accesses=1; - sys.core[i].windowed_reg_transports=1; - sys.core[i].function_calls=1; - sys.core[i].ialu_accesses=1; - sys.core[i].fpu_accesses=1; - sys.core[i].mul_accesses=1; - sys.core[i].cdb_alu_accesses=1; - sys.core[i].cdb_mul_accesses=1; - sys.core[i].cdb_fpu_accesses=1; - sys.core[i].load_buffer_reads=1; - sys.core[i].load_buffer_writes=1; - sys.core[i].load_buffer_cams=1; - sys.core[i].store_buffer_reads=1; - sys.core[i].store_buffer_writes=1; - sys.core[i].store_buffer_cams=1; - sys.core[i].store_buffer_forwards=1; - sys.core[i].main_memory_access=1; - sys.core[i].main_memory_read=1; - sys.core[i].main_memory_write=1; - sys.core[i].IFU_duty_cycle = 1; - sys.core[i].BR_duty_cycle = 1; - sys.core[i].LSU_duty_cycle = 1; - sys.core[i].MemManU_I_duty_cycle =1; - sys.core[i].MemManU_D_duty_cycle =1; - sys.core[i].ALU_duty_cycle =1; - sys.core[i].MUL_duty_cycle =1; - sys.core[i].FPU_duty_cycle =1; - sys.core[i].ALU_cdb_duty_cycle =1; - sys.core[i].MUL_cdb_duty_cycle =1; - sys.core[i].FPU_cdb_duty_cycle =1; - //system.core?.predictor - sys.core[i].predictor.prediction_width=1; - strcpy(sys.core[i].predictor.prediction_scheme,"default"); - sys.core[i].predictor.predictor_size=1; - sys.core[i].predictor.predictor_entries=1; - sys.core[i].predictor.local_predictor_entries=1; - for (j=0; j<20; j++) sys.core[i].predictor.local_predictor_size[j]=1; - sys.core[i].predictor.global_predictor_entries=1; - sys.core[i].predictor.global_predictor_bits=1; - sys.core[i].predictor.chooser_predictor_entries=1; - sys.core[i].predictor.chooser_predictor_bits=1; - sys.core[i].predictor.predictor_accesses=1; - //system.core?.itlb - sys.core[i].itlb.number_entries=1; - sys.core[i].itlb.total_hits=1; - sys.core[i].itlb.total_accesses=1; - sys.core[i].itlb.total_misses=1; - //system.core?.icache - for (j=0; j<20; j++) sys.core[i].icache.icache_config[j]=1; - //strcpy(sys.core[i].icache.buffer_sizes,"default"); - sys.core[i].icache.total_accesses=1; - sys.core[i].icache.read_accesses=1; - sys.core[i].icache.read_misses=1; - sys.core[i].icache.replacements=1; - sys.core[i].icache.read_hits=1; - sys.core[i].icache.total_hits=1; - sys.core[i].icache.total_misses=1; - sys.core[i].icache.miss_buffer_access=1; - sys.core[i].icache.fill_buffer_accesses=1; - sys.core[i].icache.prefetch_buffer_accesses=1; - sys.core[i].icache.prefetch_buffer_writes=1; - sys.core[i].icache.prefetch_buffer_reads=1; - sys.core[i].icache.prefetch_buffer_hits=1; - //system.core?.dtlb - sys.core[i].dtlb.number_entries=1; - sys.core[i].dtlb.total_accesses=1; - sys.core[i].dtlb.read_accesses=1; - sys.core[i].dtlb.write_accesses=1; - sys.core[i].dtlb.write_hits=1; - sys.core[i].dtlb.read_hits=1; - sys.core[i].dtlb.read_misses=1; - sys.core[i].dtlb.write_misses=1; - sys.core[i].dtlb.total_hits=1; - sys.core[i].dtlb.total_misses=1; - //system.core?.dcache - for (j=0; j<20; j++) sys.core[i].dcache.dcache_config[j]=1; - //strcpy(sys.core[i].dcache.buffer_sizes,"default"); - sys.core[i].dcache.total_accesses=1; - sys.core[i].dcache.read_accesses=1; - sys.core[i].dcache.write_accesses=1; - sys.core[i].dcache.total_hits=1; - sys.core[i].dcache.total_misses=1; - sys.core[i].dcache.read_hits=1; - sys.core[i].dcache.write_hits=1; - sys.core[i].dcache.read_misses=1; - sys.core[i].dcache.write_misses=1; - sys.core[i].dcache.replacements=1; - sys.core[i].dcache.write_backs=1; - sys.core[i].dcache.miss_buffer_access=1; - sys.core[i].dcache.fill_buffer_accesses=1; - sys.core[i].dcache.prefetch_buffer_accesses=1; - sys.core[i].dcache.prefetch_buffer_writes=1; - sys.core[i].dcache.prefetch_buffer_reads=1; - sys.core[i].dcache.prefetch_buffer_hits=1; - sys.core[i].dcache.wbb_writes=1; - sys.core[i].dcache.wbb_reads=1; - //system.core?.BTB - for (j=0; j<20; j++) sys.core[i].BTB.BTB_config[j]=1; - sys.core[i].BTB.total_accesses=1; - sys.core[i].BTB.read_accesses=1; - sys.core[i].BTB.write_accesses=1; - sys.core[i].BTB.total_hits=1; - sys.core[i].BTB.total_misses=1; - sys.core[i].BTB.read_hits=1; - sys.core[i].BTB.write_hits=1; - sys.core[i].BTB.read_misses=1; - sys.core[i].BTB.write_misses=1; - sys.core[i].BTB.replacements=1; - } + sys.Max_area_deviation = 1; + sys.Max_power_deviation = 1; + sys.device_type = 1; + sys.longer_channel_device = true; + sys.power_gating = false; + sys.Embedded = false; + sys.opt_dynamic_power = false; + sys.opt_lakage_power = false; + sys.opt_clockrate = true; + sys.opt_area = false; + sys.interconnect_projection_type = 1; + sys.vdd = 0; + sys.power_gating_vcc = -1; + int i, j; + for (i = 0; i <= 63; i++) { + sys.core[i].vdd = 0; + sys.core[i].power_gating_vcc = -1; + sys.core[i].clock_rate = 1; + sys.core[i].opt_local = true; + sys.core[i].x86 = false; + sys.core[i].machine_bits = 1; + sys.core[i].virtual_address_width = 1; + sys.core[i].physical_address_width = 1; + sys.core[i].opcode_width = 1; + sys.core[i].micro_opcode_width = 1; + // strcpy(sys.core[i].machine_type,"default"); + sys.core[i].internal_datapath_width = 1; + sys.core[i].number_hardware_threads = 1; + sys.core[i].fetch_width = 1; + sys.core[i].number_instruction_fetch_ports = 1; + sys.core[i].decode_width = 1; + sys.core[i].issue_width = 1; + sys.core[i].peak_issue_width = 1; + sys.core[i].commit_width = 1; + for (j = 0; j < 20; j++) + sys.core[i].pipelines_per_core[j] = 1; + for (j = 0; j < 20; j++) + sys.core[i].pipeline_depth[j] = 1; + strcpy(sys.core[i].FPU, "default"); + strcpy(sys.core[i].divider_multiplier, "default"); + sys.core[i].ALU_per_core = 1; + sys.core[i].FPU_per_core = 1.0; + sys.core[i].MUL_per_core = 1; + sys.core[i].instruction_buffer_size = 1; + sys.core[i].decoded_stream_buffer_size = 1; + // strcpy(sys.core[i].instruction_window_scheme,"default"); + sys.core[i].instruction_window_size = 1; + sys.core[i].ROB_size = 1; + sys.core[i].archi_Regs_IRF_size = 1; + sys.core[i].archi_Regs_FRF_size = 1; + sys.core[i].phy_Regs_IRF_size = 1; + sys.core[i].phy_Regs_FRF_size = 1; + // strcpy(sys.core[i].rename_scheme,"default"); + sys.core[i].checkpoint_depth = 1; + sys.core[i].register_windows_size = 1; + strcpy(sys.core[i].LSU_order, "default"); + sys.core[i].store_buffer_size = 1; + sys.core[i].load_buffer_size = 1; + sys.core[i].memory_ports = 1; + strcpy(sys.core[i].Dcache_dual_pump, "default"); + sys.core[i].RAS_size = 1; + // all stats at the level of system.core(0-n) + sys.core[i].total_instructions = 1; + sys.core[i].int_instructions = 1; + sys.core[i].fp_instructions = 1; + sys.core[i].branch_instructions = 1; + sys.core[i].branch_mispredictions = 1; + sys.core[i].committed_instructions = 1; + sys.core[i].load_instructions = 1; + sys.core[i].store_instructions = 1; + sys.core[i].total_cycles = 1; + sys.core[i].idle_cycles = 1; + sys.core[i].busy_cycles = 1; + sys.core[i].instruction_buffer_reads = 1; + sys.core[i].instruction_buffer_write = 1; + sys.core[i].ROB_reads = 1; + sys.core[i].ROB_writes = 1; + sys.core[i].rename_accesses = 1; + sys.core[i].inst_window_reads = 1; + sys.core[i].inst_window_writes = 1; + sys.core[i].inst_window_wakeup_accesses = 1; + sys.core[i].inst_window_selections = 1; + sys.core[i].archi_int_regfile_reads = 1; + sys.core[i].archi_float_regfile_reads = 1; + sys.core[i].phy_int_regfile_reads = 1; + sys.core[i].phy_float_regfile_reads = 1; + sys.core[i].windowed_reg_accesses = 1; + sys.core[i].windowed_reg_transports = 1; + sys.core[i].function_calls = 1; + sys.core[i].ialu_accesses = 1; + sys.core[i].fpu_accesses = 1; + sys.core[i].mul_accesses = 1; + sys.core[i].cdb_alu_accesses = 1; + sys.core[i].cdb_mul_accesses = 1; + sys.core[i].cdb_fpu_accesses = 1; + sys.core[i].load_buffer_reads = 1; + sys.core[i].load_buffer_writes = 1; + sys.core[i].load_buffer_cams = 1; + sys.core[i].store_buffer_reads = 1; + sys.core[i].store_buffer_writes = 1; + sys.core[i].store_buffer_cams = 1; + sys.core[i].store_buffer_forwards = 1; + sys.core[i].main_memory_access = 1; + sys.core[i].main_memory_read = 1; + sys.core[i].main_memory_write = 1; + sys.core[i].IFU_duty_cycle = 1; + sys.core[i].BR_duty_cycle = 1; + sys.core[i].LSU_duty_cycle = 1; + sys.core[i].MemManU_I_duty_cycle = 1; + sys.core[i].MemManU_D_duty_cycle = 1; + sys.core[i].ALU_duty_cycle = 1; + sys.core[i].MUL_duty_cycle = 1; + sys.core[i].FPU_duty_cycle = 1; + sys.core[i].ALU_cdb_duty_cycle = 1; + sys.core[i].MUL_cdb_duty_cycle = 1; + sys.core[i].FPU_cdb_duty_cycle = 1; + // system.core?.predictor + sys.core[i].predictor.prediction_width = 1; + strcpy(sys.core[i].predictor.prediction_scheme, "default"); + sys.core[i].predictor.predictor_size = 1; + sys.core[i].predictor.predictor_entries = 1; + sys.core[i].predictor.local_predictor_entries = 1; + for (j = 0; j < 20; j++) + sys.core[i].predictor.local_predictor_size[j] = 1; + sys.core[i].predictor.global_predictor_entries = 1; + sys.core[i].predictor.global_predictor_bits = 1; + sys.core[i].predictor.chooser_predictor_entries = 1; + sys.core[i].predictor.chooser_predictor_bits = 1; + sys.core[i].predictor.predictor_accesses = 1; + // system.core?.itlb + sys.core[i].itlb.number_entries = 1; + sys.core[i].itlb.total_hits = 1; + sys.core[i].itlb.total_accesses = 1; + sys.core[i].itlb.total_misses = 1; + // system.core?.icache + for (j = 0; j < 20; j++) + sys.core[i].icache.icache_config[j] = 1; + // strcpy(sys.core[i].icache.buffer_sizes,"default"); + sys.core[i].icache.total_accesses = 1; + sys.core[i].icache.read_accesses = 1; + sys.core[i].icache.read_misses = 1; + sys.core[i].icache.replacements = 1; + sys.core[i].icache.read_hits = 1; + sys.core[i].icache.total_hits = 1; + sys.core[i].icache.total_misses = 1; + sys.core[i].icache.miss_buffer_access = 1; + sys.core[i].icache.fill_buffer_accesses = 1; + sys.core[i].icache.prefetch_buffer_accesses = 1; + sys.core[i].icache.prefetch_buffer_writes = 1; + sys.core[i].icache.prefetch_buffer_reads = 1; + sys.core[i].icache.prefetch_buffer_hits = 1; + // system.core?.dtlb + sys.core[i].dtlb.number_entries = 1; + sys.core[i].dtlb.total_accesses = 1; + sys.core[i].dtlb.read_accesses = 1; + sys.core[i].dtlb.write_accesses = 1; + sys.core[i].dtlb.write_hits = 1; + sys.core[i].dtlb.read_hits = 1; + sys.core[i].dtlb.read_misses = 1; + sys.core[i].dtlb.write_misses = 1; + sys.core[i].dtlb.total_hits = 1; + sys.core[i].dtlb.total_misses = 1; + // system.core?.dcache + for (j = 0; j < 20; j++) + sys.core[i].dcache.dcache_config[j] = 1; + // strcpy(sys.core[i].dcache.buffer_sizes,"default"); + sys.core[i].dcache.total_accesses = 1; + sys.core[i].dcache.read_accesses = 1; + sys.core[i].dcache.write_accesses = 1; + sys.core[i].dcache.total_hits = 1; + sys.core[i].dcache.total_misses = 1; + sys.core[i].dcache.read_hits = 1; + sys.core[i].dcache.write_hits = 1; + sys.core[i].dcache.read_misses = 1; + sys.core[i].dcache.write_misses = 1; + sys.core[i].dcache.replacements = 1; + sys.core[i].dcache.write_backs = 1; + sys.core[i].dcache.miss_buffer_access = 1; + sys.core[i].dcache.fill_buffer_accesses = 1; + sys.core[i].dcache.prefetch_buffer_accesses = 1; + sys.core[i].dcache.prefetch_buffer_writes = 1; + sys.core[i].dcache.prefetch_buffer_reads = 1; + sys.core[i].dcache.prefetch_buffer_hits = 1; + sys.core[i].dcache.wbb_writes = 1; + sys.core[i].dcache.wbb_reads = 1; + // system.core?.BTB + for (j = 0; j < 20; j++) + sys.core[i].BTB.BTB_config[j] = 1; + sys.core[i].BTB.total_accesses = 1; + sys.core[i].BTB.read_accesses = 1; + sys.core[i].BTB.write_accesses = 1; + sys.core[i].BTB.total_hits = 1; + sys.core[i].BTB.total_misses = 1; + sys.core[i].BTB.read_hits = 1; + sys.core[i].BTB.write_hits = 1; + sys.core[i].BTB.read_misses = 1; + sys.core[i].BTB.write_misses = 1; + sys.core[i].BTB.replacements = 1; + } - //system_L1directory - for (i=0; i<=63; i++) - { + // system_L1directory + for (i = 0; i <= 63; i++) { - for (j=0; j<20; j++) sys.L1Directory[i].Dir_config[j]=1; - for (j=0; j<20; j++) sys.L1Directory[i].buffer_sizes[j]=1; - sys.L1Directory[i].clockrate=1; - sys.L1Directory[i].ports[20]=1; - sys.L1Directory[i].device_type=1; - sys.L1Directory[i].vdd =0; - sys.L1Directory[i].power_gating_vcc = -1; - strcpy(sys.L1Directory[i].threeD_stack,"default"); - sys.L1Directory[i].total_accesses=1; - sys.L1Directory[i].read_accesses=1; - sys.L1Directory[i].write_accesses=1; - sys.L1Directory[i].duty_cycle =1; - } - //system_L2directory - for (i=0; i<=63; i++) - { - for (j=0; j<20; j++) sys.L2Directory[i].Dir_config[j]=1; - for (j=0; j<20; j++) sys.L2Directory[i].buffer_sizes[j]=1; - sys.L2Directory[i].clockrate=1; - sys.L2Directory[i].ports[20]=1; - sys.L2Directory[i].device_type=1; - sys.L2Directory[i].vdd =0; - sys.L2Directory[i].power_gating_vcc = -1; - strcpy(sys.L2Directory[i].threeD_stack,"default"); - sys.L2Directory[i].total_accesses=1; - sys.L2Directory[i].read_accesses=1; - sys.L2Directory[i].write_accesses=1; - sys.L2Directory[i].duty_cycle =1; - } - for (i=0; i<=63; i++) - { - //system_L2 - for (j=0; j<20; j++) sys.L2[i].L2_config[j]=1; - sys.L2[i].clockrate=1; - for (j=0; j<20; j++) sys.L2[i].ports[j]=1; - sys.L2[i].device_type=1; - sys.L2[i].vdd =0; - sys.L2[i].power_gating_vcc = -1; - strcpy(sys.L2[i].threeD_stack,"default"); - for (j=0; j<20; j++) sys.L2[i].buffer_sizes[j]=1; - sys.L2[i].total_accesses=1; - sys.L2[i].read_accesses=1; - sys.L2[i].write_accesses=1; - sys.L2[i].total_hits=1; - sys.L2[i].total_misses=1; - sys.L2[i].read_hits=1; - sys.L2[i].write_hits=1; - sys.L2[i].read_misses=1; - sys.L2[i].write_misses=1; - sys.L2[i].replacements=1; - sys.L2[i].write_backs=1; - sys.L2[i].miss_buffer_accesses=1; - sys.L2[i].fill_buffer_accesses=1; - sys.L2[i].prefetch_buffer_accesses=1; - sys.L2[i].prefetch_buffer_writes=1; - sys.L2[i].prefetch_buffer_reads=1; - sys.L2[i].prefetch_buffer_hits=1; - sys.L2[i].wbb_writes=1; - sys.L2[i].wbb_reads=1; - sys.L2[i].duty_cycle =1; - sys.L2[i].merged_dir=false; - sys.L2[i].homenode_read_accesses =1; - sys.L2[i].homenode_write_accesses=1; - sys.L2[i].homenode_read_hits=1; - sys.L2[i].homenode_write_hits=1; - sys.L2[i].homenode_read_misses=1; - sys.L2[i].homenode_write_misses=1; - sys.L2[i].dir_duty_cycle=1; - } - for (i=0; i<=63; i++) - { - //system_L3 - for (j=0; j<20; j++) sys.L3[i].L3_config[j]=1; - sys.L3[i].clockrate=1; - for (j=0; j<20; j++) sys.L3[i].ports[j]=1; - sys.L3[i].device_type=1; - sys.L3[i].vdd =0; - sys.L2[i].power_gating_vcc = -1; - strcpy(sys.L3[i].threeD_stack,"default"); - for (j=0; j<20; j++) sys.L3[i].buffer_sizes[j]=1; - sys.L3[i].total_accesses=1; - sys.L3[i].read_accesses=1; - sys.L3[i].write_accesses=1; - sys.L3[i].total_hits=1; - sys.L3[i].total_misses=1; - sys.L3[i].read_hits=1; - sys.L3[i].write_hits=1; - sys.L3[i].read_misses=1; - sys.L3[i].write_misses=1; - sys.L3[i].replacements=1; - sys.L3[i].write_backs=1; - sys.L3[i].miss_buffer_accesses=1; - sys.L3[i].fill_buffer_accesses=1; - sys.L3[i].prefetch_buffer_accesses=1; - sys.L3[i].prefetch_buffer_writes=1; - sys.L3[i].prefetch_buffer_reads=1; - sys.L3[i].prefetch_buffer_hits=1; - sys.L3[i].wbb_writes=1; - sys.L3[i].wbb_reads=1; - sys.L3[i].duty_cycle =1; - sys.L3[i].merged_dir=false; - sys.L3[i].homenode_read_accesses =1; - sys.L3[i].homenode_write_accesses=1; - sys.L3[i].homenode_read_hits=1; - sys.L3[i].homenode_write_hits=1; - sys.L3[i].homenode_read_misses=1; - sys.L3[i].homenode_write_misses=1; - sys.L3[i].dir_duty_cycle=1; - } - //system_NoC - for (i=0; i<=63; i++) - { - sys.NoC[i].clockrate=1; - sys.NoC[i].type=true; - sys.NoC[i].chip_coverage=1; - sys.NoC[i].vdd =0; - sys.NoC[i].power_gating_vcc = -1; - sys.NoC[i].has_global_link = true; - strcpy(sys.NoC[i].topology,"default"); - sys.NoC[i].horizontal_nodes=1; - sys.NoC[i].vertical_nodes=1; - sys.NoC[i].input_ports=1; - sys.NoC[i].output_ports=1; - sys.NoC[i].virtual_channel_per_port=1; - sys.NoC[i].flit_bits=1; - sys.NoC[i].input_buffer_entries_per_vc=1; - sys.NoC[i].total_accesses=1; - sys.NoC[i].duty_cycle=1; - sys.NoC[i].route_over_perc = 0.5; - for (j=0; j<20; j++) sys.NoC[i].ports_of_input_buffer[j]=1; - sys.NoC[i].number_of_crossbars=1; - strcpy(sys.NoC[i].crossbar_type,"default"); - strcpy(sys.NoC[i].crosspoint_type,"default"); - //system.NoC?.xbar0; - sys.NoC[i].xbar0.number_of_inputs_of_crossbars=1; - sys.NoC[i].xbar0.number_of_outputs_of_crossbars=1; - sys.NoC[i].xbar0.flit_bits=1; - sys.NoC[i].xbar0.input_buffer_entries_per_port=1; - sys.NoC[i].xbar0.ports_of_input_buffer[20]=1; - sys.NoC[i].xbar0.crossbar_accesses=1; - } - //system_mem - sys.mem.mem_tech_node=1; - sys.mem.device_clock=1; - sys.mem.capacity_per_channel=1; - sys.mem.number_ranks=1; - sys.mem.peak_transfer_rate =1; - sys.mem.num_banks_of_DRAM_chip=1; - sys.mem.Block_width_of_DRAM_chip=1; - sys.mem.output_width_of_DRAM_chip=1; - sys.mem.page_size_of_DRAM_chip=1; - sys.mem.burstlength_of_DRAM_chip=1; - sys.mem.internal_prefetch_of_DRAM_chip=1; - sys.mem.memory_accesses=1; - sys.mem.memory_reads=1; - sys.mem.memory_writes=1; - //system_mc - sys.mc.mc_clock =1; - sys.mc.number_mcs=1; - sys.mc.peak_transfer_rate =1; - sys.mc.memory_channels_per_mc=1; - sys.mc.number_ranks=1; - sys.mc.req_window_size_per_channel=1; - sys.mc.IO_buffer_size_per_channel=1; - sys.mc.databus_width=1; - sys.mc.addressbus_width=1; - sys.mc.memory_accesses=1; - sys.mc.memory_reads=1; - sys.mc.memory_writes=1; - sys.mc.LVDS=true; - sys.mc.type=1; - sys.mc.vdd =0; - sys.mc.power_gating_vcc = -1; - //system_niu - sys.niu.clockrate =1; - sys.niu.number_units=1; - sys.niu.type = 1; - sys.niu.vdd =0; - sys.niu.power_gating_vcc = -1; - sys.niu.duty_cycle =1; - sys.niu.total_load_perc=1; - //system_pcie - sys.pcie.clockrate =1; - sys.pcie.number_units=1; - sys.pcie.num_channels=1; - sys.pcie.type = 1; - sys.pcie.vdd =0; - sys.pcie.power_gating_vcc = -1; - sys.pcie.withPHY = false; - sys.pcie.duty_cycle =1; - sys.pcie.total_load_perc=1; - //system_flash_controller - sys.flashc.mc_clock =1; - sys.flashc.number_mcs=1; - sys.flashc.vdd =0; - sys.flashc.power_gating_vcc = -1; - sys.flashc.peak_transfer_rate =1; - sys.flashc.memory_channels_per_mc=1; - sys.flashc.number_ranks=1; - sys.flashc.req_window_size_per_channel=1; - sys.flashc.IO_buffer_size_per_channel=1; - sys.flashc.databus_width=1; - sys.flashc.addressbus_width=1; - sys.flashc.memory_accesses=1; - sys.flashc.memory_reads=1; - sys.flashc.memory_writes=1; - sys.flashc.LVDS=true; - sys.flashc.withPHY = false; - sys.flashc.type =1; - sys.flashc.duty_cycle =1; - sys.flashc.total_load_perc=1; + for (j = 0; j < 20; j++) + sys.L1Directory[i].Dir_config[j] = 1; + for (j = 0; j < 20; j++) + sys.L1Directory[i].buffer_sizes[j] = 1; + sys.L1Directory[i].clockrate = 1; + sys.L1Directory[i].ports[20] = 1; + sys.L1Directory[i].device_type = 1; + sys.L1Directory[i].vdd = 0; + sys.L1Directory[i].power_gating_vcc = -1; + strcpy(sys.L1Directory[i].threeD_stack, "default"); + sys.L1Directory[i].total_accesses = 1; + sys.L1Directory[i].read_accesses = 1; + sys.L1Directory[i].write_accesses = 1; + sys.L1Directory[i].duty_cycle = 1; + } + // system_L2directory + for (i = 0; i <= 63; i++) { + for (j = 0; j < 20; j++) + sys.L2Directory[i].Dir_config[j] = 1; + for (j = 0; j < 20; j++) + sys.L2Directory[i].buffer_sizes[j] = 1; + sys.L2Directory[i].clockrate = 1; + sys.L2Directory[i].ports[20] = 1; + sys.L2Directory[i].device_type = 1; + sys.L2Directory[i].vdd = 0; + sys.L2Directory[i].power_gating_vcc = -1; + strcpy(sys.L2Directory[i].threeD_stack, "default"); + sys.L2Directory[i].total_accesses = 1; + sys.L2Directory[i].read_accesses = 1; + sys.L2Directory[i].write_accesses = 1; + sys.L2Directory[i].duty_cycle = 1; + } + for (i = 0; i <= 63; i++) { + // system_L2 + for (j = 0; j < 20; j++) + sys.L2[i].L2_config[j] = 1; + sys.L2[i].clockrate = 1; + for (j = 0; j < 20; j++) + sys.L2[i].ports[j] = 1; + sys.L2[i].device_type = 1; + sys.L2[i].vdd = 0; + sys.L2[i].power_gating_vcc = -1; + strcpy(sys.L2[i].threeD_stack, "default"); + for (j = 0; j < 20; j++) + sys.L2[i].buffer_sizes[j] = 1; + sys.L2[i].total_accesses = 1; + sys.L2[i].read_accesses = 1; + sys.L2[i].write_accesses = 1; + sys.L2[i].total_hits = 1; + sys.L2[i].total_misses = 1; + sys.L2[i].read_hits = 1; + sys.L2[i].write_hits = 1; + sys.L2[i].read_misses = 1; + sys.L2[i].write_misses = 1; + sys.L2[i].replacements = 1; + sys.L2[i].write_backs = 1; + sys.L2[i].miss_buffer_accesses = 1; + sys.L2[i].fill_buffer_accesses = 1; + sys.L2[i].prefetch_buffer_accesses = 1; + sys.L2[i].prefetch_buffer_writes = 1; + sys.L2[i].prefetch_buffer_reads = 1; + sys.L2[i].prefetch_buffer_hits = 1; + sys.L2[i].wbb_writes = 1; + sys.L2[i].wbb_reads = 1; + sys.L2[i].duty_cycle = 1; + sys.L2[i].merged_dir = false; + sys.L2[i].homenode_read_accesses = 1; + sys.L2[i].homenode_write_accesses = 1; + sys.L2[i].homenode_read_hits = 1; + sys.L2[i].homenode_write_hits = 1; + sys.L2[i].homenode_read_misses = 1; + sys.L2[i].homenode_write_misses = 1; + sys.L2[i].dir_duty_cycle = 1; + } + for (i = 0; i <= 63; i++) { + // system_L3 + for (j = 0; j < 20; j++) + sys.L3[i].L3_config[j] = 1; + sys.L3[i].clockrate = 1; + for (j = 0; j < 20; j++) + sys.L3[i].ports[j] = 1; + sys.L3[i].device_type = 1; + sys.L3[i].vdd = 0; + sys.L2[i].power_gating_vcc = -1; + strcpy(sys.L3[i].threeD_stack, "default"); + for (j = 0; j < 20; j++) + sys.L3[i].buffer_sizes[j] = 1; + sys.L3[i].total_accesses = 1; + sys.L3[i].read_accesses = 1; + sys.L3[i].write_accesses = 1; + sys.L3[i].total_hits = 1; + sys.L3[i].total_misses = 1; + sys.L3[i].read_hits = 1; + sys.L3[i].write_hits = 1; + sys.L3[i].read_misses = 1; + sys.L3[i].write_misses = 1; + sys.L3[i].replacements = 1; + sys.L3[i].write_backs = 1; + sys.L3[i].miss_buffer_accesses = 1; + sys.L3[i].fill_buffer_accesses = 1; + sys.L3[i].prefetch_buffer_accesses = 1; + sys.L3[i].prefetch_buffer_writes = 1; + sys.L3[i].prefetch_buffer_reads = 1; + sys.L3[i].prefetch_buffer_hits = 1; + sys.L3[i].wbb_writes = 1; + sys.L3[i].wbb_reads = 1; + sys.L3[i].duty_cycle = 1; + sys.L3[i].merged_dir = false; + sys.L3[i].homenode_read_accesses = 1; + sys.L3[i].homenode_write_accesses = 1; + sys.L3[i].homenode_read_hits = 1; + sys.L3[i].homenode_write_hits = 1; + sys.L3[i].homenode_read_misses = 1; + sys.L3[i].homenode_write_misses = 1; + sys.L3[i].dir_duty_cycle = 1; + } + // system_NoC + for (i = 0; i <= 63; i++) { + sys.NoC[i].clockrate = 1; + sys.NoC[i].type = true; + sys.NoC[i].chip_coverage = 1; + sys.NoC[i].vdd = 0; + sys.NoC[i].power_gating_vcc = -1; + sys.NoC[i].has_global_link = true; + strcpy(sys.NoC[i].topology, "default"); + sys.NoC[i].horizontal_nodes = 1; + sys.NoC[i].vertical_nodes = 1; + sys.NoC[i].input_ports = 1; + sys.NoC[i].output_ports = 1; + sys.NoC[i].virtual_channel_per_port = 1; + sys.NoC[i].flit_bits = 1; + sys.NoC[i].input_buffer_entries_per_vc = 1; + sys.NoC[i].total_accesses = 1; + sys.NoC[i].duty_cycle = 1; + sys.NoC[i].route_over_perc = 0.5; + for (j = 0; j < 20; j++) + sys.NoC[i].ports_of_input_buffer[j] = 1; + sys.NoC[i].number_of_crossbars = 1; + strcpy(sys.NoC[i].crossbar_type, "default"); + strcpy(sys.NoC[i].crosspoint_type, "default"); + // system.NoC?.xbar0; + sys.NoC[i].xbar0.number_of_inputs_of_crossbars = 1; + sys.NoC[i].xbar0.number_of_outputs_of_crossbars = 1; + sys.NoC[i].xbar0.flit_bits = 1; + sys.NoC[i].xbar0.input_buffer_entries_per_port = 1; + sys.NoC[i].xbar0.ports_of_input_buffer[20] = 1; + sys.NoC[i].xbar0.crossbar_accesses = 1; + } + // system_mem + sys.mem.mem_tech_node = 1; + sys.mem.device_clock = 1; + sys.mem.capacity_per_channel = 1; + sys.mem.number_ranks = 1; + sys.mem.peak_transfer_rate = 1; + sys.mem.num_banks_of_DRAM_chip = 1; + sys.mem.Block_width_of_DRAM_chip = 1; + sys.mem.output_width_of_DRAM_chip = 1; + sys.mem.page_size_of_DRAM_chip = 1; + sys.mem.burstlength_of_DRAM_chip = 1; + sys.mem.internal_prefetch_of_DRAM_chip = 1; + sys.mem.memory_accesses = 1; + sys.mem.memory_reads = 1; + sys.mem.memory_writes = 1; + // system_mc + sys.mc.mc_clock = 1; + sys.mc.number_mcs = 1; + sys.mc.peak_transfer_rate = 1; + sys.mc.memory_channels_per_mc = 1; + sys.mc.number_ranks = 1; + sys.mc.req_window_size_per_channel = 1; + sys.mc.IO_buffer_size_per_channel = 1; + sys.mc.databus_width = 1; + sys.mc.addressbus_width = 1; + sys.mc.memory_accesses = 1; + sys.mc.memory_reads = 1; + sys.mc.memory_writes = 1; + sys.mc.LVDS = true; + sys.mc.type = 1; + sys.mc.vdd = 0; + sys.mc.power_gating_vcc = -1; + // system_niu + sys.niu.clockrate = 1; + sys.niu.number_units = 1; + sys.niu.type = 1; + sys.niu.vdd = 0; + sys.niu.power_gating_vcc = -1; + sys.niu.duty_cycle = 1; + sys.niu.total_load_perc = 1; + // system_pcie + sys.pcie.clockrate = 1; + sys.pcie.number_units = 1; + sys.pcie.num_channels = 1; + sys.pcie.type = 1; + sys.pcie.vdd = 0; + sys.pcie.power_gating_vcc = -1; + sys.pcie.withPHY = false; + sys.pcie.duty_cycle = 1; + sys.pcie.total_load_perc = 1; + // system_flash_controller + sys.flashc.mc_clock = 1; + sys.flashc.number_mcs = 1; + sys.flashc.vdd = 0; + sys.flashc.power_gating_vcc = -1; + sys.flashc.peak_transfer_rate = 1; + sys.flashc.memory_channels_per_mc = 1; + sys.flashc.number_ranks = 1; + sys.flashc.req_window_size_per_channel = 1; + sys.flashc.IO_buffer_size_per_channel = 1; + sys.flashc.databus_width = 1; + sys.flashc.addressbus_width = 1; + sys.flashc.memory_accesses = 1; + sys.flashc.memory_reads = 1; + sys.flashc.memory_writes = 1; + sys.flashc.LVDS = true; + sys.flashc.withPHY = false; + sys.flashc.type = 1; + sys.flashc.duty_cycle = 1; + sys.flashc.total_load_perc = 1; } diff --git a/XML_Parse.h b/XML_Parse.h index a5ec883..9ae752e 100644 --- a/XML_Parse.h +++ b/XML_Parse.h @@ -32,581 +32,585 @@ #ifndef XML_PARSE_H_ #define XML_PARSE_H_ - //#ifdef WIN32 //#define _CRT_SECURE_NO_DEPRECATE //#endif -#include #include "xmlParser.h" -#include + #include +#include +#include using namespace std; /* void myfree(char *t); // {free(t);} ToXMLStringTool tx,tx2; */ -//all subnodes at the level of system.core(0-n) -//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate +// all subnodes at the level of system.core(0-n) +// cache_policy is added into cache property arrays;//0 no write or write-though +// with non-write allocate;1 write-back with write-allocate -typedef struct{ - int prediction_width; - char prediction_scheme[20]; - int predictor_size; - int predictor_entries; - int local_predictor_size[20]; - int local_predictor_entries; - int global_predictor_entries; - int global_predictor_bits; - int chooser_predictor_entries; - int chooser_predictor_bits; - double predictor_accesses; +typedef struct { + int prediction_width; + char prediction_scheme[20]; + int predictor_size; + int predictor_entries; + int local_predictor_size[20]; + int local_predictor_entries; + int global_predictor_entries; + int global_predictor_bits; + int chooser_predictor_entries; + int chooser_predictor_bits; + double predictor_accesses; } predictor_systemcore; -typedef struct{ - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - double total_hits; - double total_accesses; - double total_misses; - double conflicts; +typedef struct { + int number_entries; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + double total_hits; + double total_accesses; + double total_misses; + double conflicts; } itlb_systemcore; -typedef struct{ - //params - double icache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double read_misses; - double replacements; - double read_hits; - double total_hits; - double total_misses; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double conflicts; +typedef struct { + // params + double icache_config[20]; + int buffer_sizes[20]; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + // stats + double total_accesses; + double read_accesses; + double read_misses; + double replacements; + double read_hits; + double total_hits; + double total_misses; + double miss_buffer_access; + double fill_buffer_accesses; + double prefetch_buffer_accesses; + double prefetch_buffer_writes; + double prefetch_buffer_reads; + double prefetch_buffer_hits; + double conflicts; } icache_systemcore; -typedef struct{ - //params - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double write_hits; - double read_hits; - double read_misses; - double write_misses; - double total_hits; - double total_misses; - double conflicts; +typedef struct { + // params + int number_entries; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double write_hits; + double read_hits; + double read_misses; + double write_misses; + double total_hits; + double total_misses; + double conflicts; } dtlb_systemcore; -typedef struct{ - //params - double dcache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; +typedef struct { + // params + double dcache_config[20]; + int buffer_sizes[20]; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double total_hits; + double total_misses; + double read_hits; + double write_hits; + double read_misses; + double write_misses; + double replacements; + double write_backs; + double miss_buffer_access; + double fill_buffer_accesses; + double prefetch_buffer_accesses; + double prefetch_buffer_writes; + double prefetch_buffer_reads; + double prefetch_buffer_hits; + double wbb_writes; + double wbb_reads; + double conflicts; } dcache_systemcore; -typedef struct{ - //params - int BTB_config[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; +typedef struct { + // params + int BTB_config[20]; + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double total_hits; + double total_misses; + double read_hits; + double write_hits; + double read_misses; + double write_misses; + double replacements; } BTB_systemcore; -typedef struct{ - //all params at the level of system.core(0-n) - int clock_rate; - bool opt_local; - bool x86; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int opcode_width; - int micro_opcode_width; - int instruction_length; - int machine_type; - int internal_datapath_width; - int number_hardware_threads; - int fetch_width; - int number_instruction_fetch_ports; - int decode_width; - int issue_width; - int peak_issue_width; - int commit_width; - int pipelines_per_core[20]; - int pipeline_depth[20]; - char FPU[20]; - char divider_multiplier[20]; - int ALU_per_core; - double FPU_per_core; - int MUL_per_core; - int instruction_buffer_size; - int decoded_stream_buffer_size; - int instruction_window_scheme; - int instruction_window_size; - int fp_instruction_window_size; - int ROB_size; - int archi_Regs_IRF_size; - int archi_Regs_FRF_size; - int phy_Regs_IRF_size; - int phy_Regs_FRF_size; - int rename_scheme; - int checkpoint_depth; - int register_windows_size; - char LSU_order[20]; - int store_buffer_size; - int load_buffer_size; - int memory_ports; - char Dcache_dual_pump[20]; - int RAS_size; - int fp_issue_width; - int prediction_width; - int number_of_BTB; - int number_of_BPT; +typedef struct { + // all params at the level of system.core(0-n) + int clock_rate; + bool opt_local; + bool x86; + int machine_bits; + int virtual_address_width; + int physical_address_width; + int opcode_width; + int micro_opcode_width; + int instruction_length; + int machine_type; + int internal_datapath_width; + int number_hardware_threads; + int fetch_width; + int number_instruction_fetch_ports; + int decode_width; + int issue_width; + int peak_issue_width; + int commit_width; + int pipelines_per_core[20]; + int pipeline_depth[20]; + char FPU[20]; + char divider_multiplier[20]; + int ALU_per_core; + double FPU_per_core; + int MUL_per_core; + int instruction_buffer_size; + int decoded_stream_buffer_size; + int instruction_window_scheme; + int instruction_window_size; + int fp_instruction_window_size; + int ROB_size; + int archi_Regs_IRF_size; + int archi_Regs_FRF_size; + int phy_Regs_IRF_size; + int phy_Regs_FRF_size; + int rename_scheme; + int checkpoint_depth; + int register_windows_size; + char LSU_order[20]; + int store_buffer_size; + int load_buffer_size; + int memory_ports; + char Dcache_dual_pump[20]; + int RAS_size; + int fp_issue_width; + int prediction_width; + int number_of_BTB; + int number_of_BPT; - //all stats at the level of system.core(0-n) - double total_instructions; - double int_instructions; - double fp_instructions; - double branch_instructions; - double branch_mispredictions; - double committed_instructions; - double committed_int_instructions; - double committed_fp_instructions; - double load_instructions; - double store_instructions; - double total_cycles; - double idle_cycles; - double busy_cycles; - double instruction_buffer_reads; - double instruction_buffer_write; - double ROB_reads; - double ROB_writes; - double rename_accesses; - double fp_rename_accesses; - double rename_reads; - double rename_writes; - double fp_rename_reads; - double fp_rename_writes; - double inst_window_reads; - double inst_window_writes; - double inst_window_wakeup_accesses; - double inst_window_selections; - double fp_inst_window_reads; - double fp_inst_window_writes; - double fp_inst_window_wakeup_accesses; - double fp_inst_window_selections; - double archi_int_regfile_reads; - double archi_float_regfile_reads; - double phy_int_regfile_reads; - double phy_float_regfile_reads; - double phy_int_regfile_writes; - double phy_float_regfile_writes; - double archi_int_regfile_writes; - double archi_float_regfile_writes; - double int_regfile_reads; - double float_regfile_reads; - double int_regfile_writes; - double float_regfile_writes; - double windowed_reg_accesses; - double windowed_reg_transports; - double function_calls; - double context_switches; - double ialu_accesses; - double fpu_accesses; - double mul_accesses; - double cdb_alu_accesses; - double cdb_mul_accesses; - double cdb_fpu_accesses; - double load_buffer_reads; - double load_buffer_writes; - double load_buffer_cams; - double store_buffer_reads; - double store_buffer_writes; - double store_buffer_cams; - double store_buffer_forwards; - double main_memory_access; - double main_memory_read; - double main_memory_write; - double pipeline_duty_cycle; + // all stats at the level of system.core(0-n) + double total_instructions; + double int_instructions; + double fp_instructions; + double branch_instructions; + double branch_mispredictions; + double committed_instructions; + double committed_int_instructions; + double committed_fp_instructions; + double load_instructions; + double store_instructions; + double total_cycles; + double idle_cycles; + double busy_cycles; + double instruction_buffer_reads; + double instruction_buffer_write; + double ROB_reads; + double ROB_writes; + double rename_accesses; + double fp_rename_accesses; + double rename_reads; + double rename_writes; + double fp_rename_reads; + double fp_rename_writes; + double inst_window_reads; + double inst_window_writes; + double inst_window_wakeup_accesses; + double inst_window_selections; + double fp_inst_window_reads; + double fp_inst_window_writes; + double fp_inst_window_wakeup_accesses; + double fp_inst_window_selections; + double archi_int_regfile_reads; + double archi_float_regfile_reads; + double phy_int_regfile_reads; + double phy_float_regfile_reads; + double phy_int_regfile_writes; + double phy_float_regfile_writes; + double archi_int_regfile_writes; + double archi_float_regfile_writes; + double int_regfile_reads; + double float_regfile_reads; + double int_regfile_writes; + double float_regfile_writes; + double windowed_reg_accesses; + double windowed_reg_transports; + double function_calls; + double context_switches; + double ialu_accesses; + double fpu_accesses; + double mul_accesses; + double cdb_alu_accesses; + double cdb_mul_accesses; + double cdb_fpu_accesses; + double load_buffer_reads; + double load_buffer_writes; + double load_buffer_cams; + double store_buffer_reads; + double store_buffer_writes; + double store_buffer_cams; + double store_buffer_forwards; + double main_memory_access; + double main_memory_read; + double main_memory_write; + double pipeline_duty_cycle; - double IFU_duty_cycle ; - double BR_duty_cycle ; - double LSU_duty_cycle ; - double MemManU_I_duty_cycle; - double MemManU_D_duty_cycle ; - double ALU_duty_cycle ; - double MUL_duty_cycle ; - double FPU_duty_cycle ; - double ALU_cdb_duty_cycle ; - double MUL_cdb_duty_cycle ; - double FPU_cdb_duty_cycle ; + double IFU_duty_cycle; + double BR_duty_cycle; + double LSU_duty_cycle; + double MemManU_I_duty_cycle; + double MemManU_D_duty_cycle; + double ALU_duty_cycle; + double MUL_duty_cycle; + double FPU_duty_cycle; + double ALU_cdb_duty_cycle; + double MUL_cdb_duty_cycle; + double FPU_cdb_duty_cycle; - double vdd; - double power_gating_vcc; + double vdd; + double power_gating_vcc; - //all subnodes at the level of system.core(0-n) - predictor_systemcore predictor; - itlb_systemcore itlb; - icache_systemcore icache; - dtlb_systemcore dtlb; - dcache_systemcore dcache; - BTB_systemcore BTB; + // all subnodes at the level of system.core(0-n) + predictor_systemcore predictor; + itlb_systemcore itlb; + icache_systemcore icache; + dtlb_systemcore dtlb; + dcache_systemcore dcache; + BTB_systemcore BTB; } system_core; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - double vdd; - double power_gating_vcc; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; +typedef struct { + // params + int Directory_type; + double Dir_config[20]; + int buffer_sizes[20]; + int clockrate; + int ports[20]; + int device_type; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + char threeD_stack[20]; + double vdd; + double power_gating_vcc; + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double read_misses; + double write_misses; + double conflicts; + double duty_cycle; } system_L1Directory; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - double vdd; - double power_gating_vcc; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; +typedef struct { + // params + int Directory_type; + double Dir_config[20]; + int buffer_sizes[20]; + int clockrate; + int ports[20]; + int device_type; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + char threeD_stack[20]; + double vdd; + double power_gating_vcc; + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double read_misses; + double write_misses; + double conflicts; + double duty_cycle; } system_L2Directory; -typedef struct{ - //params - double L2_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - double vdd; - double power_gating_vcc; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; +typedef struct { + // params + double L2_config[20]; + int clockrate; + int ports[20]; + int device_type; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + char threeD_stack[20]; + int buffer_sizes[20]; + double vdd; + double power_gating_vcc; + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double total_hits; + double total_misses; + double read_hits; + double write_hits; + double read_misses; + double write_misses; + double replacements; + double write_backs; + double miss_buffer_accesses; + double fill_buffer_accesses; + double prefetch_buffer_accesses; + double prefetch_buffer_writes; + double prefetch_buffer_reads; + double prefetch_buffer_hits; + double wbb_writes; + double wbb_reads; + double conflicts; + double duty_cycle; - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; + bool merged_dir; + double homenode_read_accesses; + double homenode_write_accesses; + double homenode_read_hits; + double homenode_write_hits; + double homenode_read_misses; + double homenode_write_misses; + double dir_duty_cycle; } system_L2; -typedef struct{ - //params - double L3_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - double vdd; - double power_gating_vcc; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; +typedef struct { + // params + double L3_config[20]; + int clockrate; + int ports[20]; + int device_type; + int cache_policy; // 0 no write or write-though with non-write allocate;1 + // write-back with write-allocate + char threeD_stack[20]; + int buffer_sizes[20]; + double vdd; + double power_gating_vcc; + // stats + double total_accesses; + double read_accesses; + double write_accesses; + double total_hits; + double total_misses; + double read_hits; + double write_hits; + double read_misses; + double write_misses; + double replacements; + double write_backs; + double miss_buffer_accesses; + double fill_buffer_accesses; + double prefetch_buffer_accesses; + double prefetch_buffer_writes; + double prefetch_buffer_reads; + double prefetch_buffer_hits; + double wbb_writes; + double wbb_reads; + double conflicts; + double duty_cycle; - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; + bool merged_dir; + double homenode_read_accesses; + double homenode_write_accesses; + double homenode_read_hits; + double homenode_write_hits; + double homenode_read_misses; + double homenode_write_misses; + double dir_duty_cycle; } system_L3; -typedef struct{ - //params - int number_of_inputs_of_crossbars; - int number_of_outputs_of_crossbars; - int flit_bits; - int input_buffer_entries_per_port; - int ports_of_input_buffer[20]; - //stats - double crossbar_accesses; +typedef struct { + // params + int number_of_inputs_of_crossbars; + int number_of_outputs_of_crossbars; + int flit_bits; + int input_buffer_entries_per_port; + int ports_of_input_buffer[20]; + // stats + double crossbar_accesses; } xbar0_systemNoC; -typedef struct{ - //params - int clockrate; - bool type; - bool has_global_link; - char topology[20]; - int horizontal_nodes; - int vertical_nodes; - int link_throughput; - int link_latency; - int input_ports; - int output_ports; - int virtual_channel_per_port; - int flit_bits; - int input_buffer_entries_per_vc; - int ports_of_input_buffer[20]; - int dual_pump; - int number_of_crossbars; - char crossbar_type[20]; - char crosspoint_type[20]; - xbar0_systemNoC xbar0; - int arbiter_type; - double chip_coverage; - double vdd; - double power_gating_vcc; - //stats - double total_accesses; - double duty_cycle; - double route_over_perc; +typedef struct { + // params + int clockrate; + bool type; + bool has_global_link; + char topology[20]; + int horizontal_nodes; + int vertical_nodes; + int link_throughput; + int link_latency; + int input_ports; + int output_ports; + int virtual_channel_per_port; + int flit_bits; + int input_buffer_entries_per_vc; + int ports_of_input_buffer[20]; + int dual_pump; + int number_of_crossbars; + char crossbar_type[20]; + char crosspoint_type[20]; + xbar0_systemNoC xbar0; + int arbiter_type; + double chip_coverage; + double vdd; + double power_gating_vcc; + // stats + double total_accesses; + double duty_cycle; + double route_over_perc; } system_NoC; -typedef struct{ - //params - int mem_tech_node; - int device_clock; - int peak_transfer_rate; - int internal_prefetch_of_DRAM_chip; - int capacity_per_channel; - int number_ranks; - int num_banks_of_DRAM_chip; - int Block_width_of_DRAM_chip; - int output_width_of_DRAM_chip; - int page_size_of_DRAM_chip; - int burstlength_of_DRAM_chip; - //stats - double memory_accesses; - double memory_reads; - double memory_writes; +typedef struct { + // params + int mem_tech_node; + int device_clock; + int peak_transfer_rate; + int internal_prefetch_of_DRAM_chip; + int capacity_per_channel; + int number_ranks; + int num_banks_of_DRAM_chip; + int Block_width_of_DRAM_chip; + int output_width_of_DRAM_chip; + int page_size_of_DRAM_chip; + int burstlength_of_DRAM_chip; + // stats + double memory_accesses; + double memory_reads; + double memory_writes; } system_mem; -typedef struct{ - //params - //Common Param for mc and fc - double peak_transfer_rate; - int number_mcs; - bool withPHY; - int type; +typedef struct { + // params + // Common Param for mc and fc + double peak_transfer_rate; + int number_mcs; + bool withPHY; + int type; - //FCParam - //stats - double duty_cycle; - double total_load_perc; + // FCParam + // stats + double duty_cycle; + double total_load_perc; - //McParam - int mc_clock; - int llc_line_length; - int memory_channels_per_mc; - int number_ranks; - int req_window_size_per_channel; - int IO_buffer_size_per_channel; - int databus_width; - int addressbus_width; - bool LVDS; - double vdd; - double power_gating_vcc; + // McParam + int mc_clock; + int llc_line_length; + int memory_channels_per_mc; + int number_ranks; + int req_window_size_per_channel; + int IO_buffer_size_per_channel; + int databus_width; + int addressbus_width; + bool LVDS; + double vdd; + double power_gating_vcc; - //stats - double memory_accesses; - double memory_reads; - double memory_writes; + // stats + double memory_accesses; + double memory_reads; + double memory_writes; } system_mc; -typedef struct{ - //params - int clockrate; - int number_units; - int type; - double vdd; - double power_gating_vcc; - //stats - double duty_cycle; - double total_load_perc; +typedef struct { + // params + int clockrate; + int number_units; + int type; + double vdd; + double power_gating_vcc; + // stats + double duty_cycle; + double total_load_perc; } system_niu; -typedef struct{ - //params - int clockrate; - int number_units; - int num_channels; - int type; - bool withPHY; - double vdd; - double power_gating_vcc; - //stats - double duty_cycle; - double total_load_perc; +typedef struct { + // params + int clockrate; + int number_units; + int num_channels; + int type; + bool withPHY; + double vdd; + double power_gating_vcc; + // stats + double duty_cycle; + double total_load_perc; } system_pcie; -typedef struct{ - //All number_of_* at the level of 'system' Ying 03/21/2009 - int number_of_cores; - int number_of_L1Directories; - int number_of_L2Directories; - int number_of_L2s; - bool Private_L2; - int number_of_L3s; - int number_of_NoCs; - int number_of_dir_levels; - int domain_size; - int first_level_dir; - // All params at the level of 'system' - int homogeneous_cores; - int homogeneous_L1Directories; - int homogeneous_L2Directories; - double core_tech_node; - int target_core_clockrate; - int target_chip_area; - int temperature; - int number_cache_levels; - int L1_property; - int L2_property; - int homogeneous_L2s; - int L3_property; - int homogeneous_L3s; - int homogeneous_NoCs; - int homogeneous_ccs; - int Max_area_deviation; - int Max_power_deviation; - int device_type; - bool longer_channel_device; - bool power_gating; - bool Embedded; - bool opt_dynamic_power; - bool opt_lakage_power; - bool opt_clockrate; - bool opt_area; - int interconnect_projection_type; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int virtual_memory_page_size; - double total_cycles; - double vdd; - double power_gating_vcc; - //system.core(0-n):3rd level - system_core core[64]; - system_L1Directory L1Directory[64]; - system_L2Directory L2Directory[64]; - system_L2 L2[64]; - system_L3 L3[64]; - system_NoC NoC[64]; - system_mem mem; - system_mc mc; - system_mc flashc; - system_niu niu; - system_pcie pcie; +typedef struct { + // All number_of_* at the level of 'system' Ying 03/21/2009 + int number_of_cores; + int number_of_L1Directories; + int number_of_L2Directories; + int number_of_L2s; + bool Private_L2; + int number_of_L3s; + int number_of_NoCs; + int number_of_dir_levels; + int domain_size; + int first_level_dir; + // All params at the level of 'system' + int homogeneous_cores; + int homogeneous_L1Directories; + int homogeneous_L2Directories; + double core_tech_node; + int target_core_clockrate; + int target_chip_area; + int temperature; + int number_cache_levels; + int L1_property; + int L2_property; + int homogeneous_L2s; + int L3_property; + int homogeneous_L3s; + int homogeneous_NoCs; + int homogeneous_ccs; + int Max_area_deviation; + int Max_power_deviation; + int device_type; + bool longer_channel_device; + bool power_gating; + bool Embedded; + bool opt_dynamic_power; + bool opt_lakage_power; + bool opt_clockrate; + bool opt_area; + int interconnect_projection_type; + int machine_bits; + int virtual_address_width; + int physical_address_width; + int virtual_memory_page_size; + double total_cycles; + double vdd; + double power_gating_vcc; + // system.core(0-n):3rd level + system_core core[64]; + system_L1Directory L1Directory[64]; + system_L2Directory L2Directory[64]; + system_L2 L2[64]; + system_L3 L3[64]; + system_NoC NoC[64]; + system_mem mem; + system_mc mc; + system_mc flashc; + system_niu niu; + system_pcie pcie; } root_system; -class ParseXML -{ +class ParseXML { public: - void parse(char* filepath); - void initialize(); + void parse(char *filepath); + void initialize(); + public: - root_system sys; + root_system sys; }; - #endif /* XML_PARSE_H_ */ - - - - diff --git a/arch_const.h b/arch_const.h index feffa6c..0c82248 100644 --- a/arch_const.h +++ b/arch_const.h @@ -32,202 +32,189 @@ #ifndef ARCH_CONST_H_ #define ARCH_CONST_H_ -typedef struct{ - unsigned int capacity; - unsigned int assoc;//fully - unsigned int blocksize; +typedef struct { + unsigned int capacity; + unsigned int assoc; // fully + unsigned int blocksize; } array_inputs; -//Do Not change, unless you want to bypass the XML interface and do not care about the default values. -//Global parameters -const int number_of_cores = 8; -const int number_of_L2s = 1; -const int number_of_L3s = 1; -const int number_of_NoCs = 1; - -const double archi_F_sz_nm = 90.0; -const unsigned int dev_type = 0; -const double CLOCKRATE = 1.2*1e9; -const double AF = 0.5; -//const bool inorder = true; -const bool embedded = false; //NEW - -const bool homogeneous_cores = true; -const bool temperature = 360; -const int number_cache_levels = 3; -const int L1_property = 0; //private 0; coherent 1, shared 2. -const int L2_property = 2; -const bool homogeneous_L2s = true; -const bool L3_property = 2; -const bool homogeneous_L3s = true; -const double Max_area_deviation = 50; -const double Max_dynamic_deviation =50; //New -const int opt_dynamic_power = 1; -const int opt_lakage_power = 0; -const int opt_area = 0; -const int interconnect_projection_type = 0; +// Do Not change, unless you want to bypass the XML interface and do not care +// about the default values. Global parameters +const int number_of_cores = 8; +const int number_of_L2s = 1; +const int number_of_L3s = 1; +const int number_of_NoCs = 1; + +const double archi_F_sz_nm = 90.0; +const unsigned int dev_type = 0; +const double CLOCKRATE = 1.2 * 1e9; +const double AF = 0.5; +// const bool inorder = true; +const bool embedded = false; // NEW + +const bool homogeneous_cores = true; +const bool temperature = 360; +const int number_cache_levels = 3; +const int L1_property = 0; // private 0; coherent 1, shared 2. +const int L2_property = 2; +const bool homogeneous_L2s = true; +const bool L3_property = 2; +const bool homogeneous_L3s = true; +const double Max_area_deviation = 50; +const double Max_dynamic_deviation = 50; // New +const int opt_dynamic_power = 1; +const int opt_lakage_power = 0; +const int opt_area = 0; +const int interconnect_projection_type = 0; //******************************Core Parameters #if (inorder) -const int opcode_length = 8;//Niagara -const int reg_length = 5;//Niagara -const int instruction_length = 32;//Niagara -const int data_width = 64; +const int opcode_length = 8; // Niagara +const int reg_length = 5; // Niagara +const int instruction_length = 32; // Niagara +const int data_width = 64; #else -const int opcode_length = 8;//16;//Niagara -const int reg_length = 7;//Niagara -const int instruction_length = 32;//Niagara -const int data_width = 64; +const int opcode_length = 8; // 16;//Niagara +const int reg_length = 7; // Niagara +const int instruction_length = 32; // Niagara +const int data_width = 64; #endif - -//Caches -//itlb -const int itlbsize=512; -const int itlbassoc=0;//fully -const int itlbblocksize=8; -//icache -const int icachesize=32768; -const int icacheassoc=4; -const int icacheblocksize=32; -//dtlb -const int dtlbsize=512; -const int dtlbassoc=0;//fully -const int dtlbblocksize=8; -//dcache -const int dcachesize=32768; -const int dcacheassoc=4; -const int dcacheblocksize=32; -const int dcache_write_buffers=8; - -//cache controllers -//IB, -const int numIBEntries = 64; -const int IBsize = 64;//2*4*instruction_length/8*2; -const int IBassoc = 0;//In Niagara it is still fully associ -const int IBblocksize = 4; - -//IFB and MIL should have the same parameters CAM -const int IFBsize=128;// -const int IFBassoc=0;//In Niagara it is still fully associ -const int IFBblocksize=4; - - - - -const int icache_write_buffers=8; - -//register file RAM -const int regfilesize=5760; -const int regfileassoc=1; -const int regfileblocksize=18; -//regwin RAM -const int regwinsize=256; -const int regwinassoc=1; -const int regwinblocksize=8; - - - -//store buffer, lsq -const int lsqsize=512; -const int lsqassoc=0; -const int lsqblocksize=8; - -//data fill queue RAM -const int dfqsize=1024; -const int dfqassoc=1; -const int dfqblocksize=16; - -//outside the cores -//L2 cache bank -const int l2cachesize=262144; -const int l2cacheassoc=16; -const int l2cacheblocksize=64; - -//L2 directory -const int l2dirsize=1024; -const int l2dirassoc=0; -const int l2dirblocksize=2; - -//crossbar -//PCX +// Caches +// itlb +const int itlbsize = 512; +const int itlbassoc = 0; // fully +const int itlbblocksize = 8; +// icache +const int icachesize = 32768; +const int icacheassoc = 4; +const int icacheblocksize = 32; +// dtlb +const int dtlbsize = 512; +const int dtlbassoc = 0; // fully +const int dtlbblocksize = 8; +// dcache +const int dcachesize = 32768; +const int dcacheassoc = 4; +const int dcacheblocksize = 32; +const int dcache_write_buffers = 8; + +// cache controllers +// IB, +const int numIBEntries = 64; +const int IBsize = 64; // 2*4*instruction_length/8*2; +const int IBassoc = 0; // In Niagara it is still fully associ +const int IBblocksize = 4; + +// IFB and MIL should have the same parameters CAM +const int IFBsize = 128; // +const int IFBassoc = 0; // In Niagara it is still fully associ +const int IFBblocksize = 4; + +const int icache_write_buffers = 8; + +// register file RAM +const int regfilesize = 5760; +const int regfileassoc = 1; +const int regfileblocksize = 18; +// regwin RAM +const int regwinsize = 256; +const int regwinassoc = 1; +const int regwinblocksize = 8; + +// store buffer, lsq +const int lsqsize = 512; +const int lsqassoc = 0; +const int lsqblocksize = 8; + +// data fill queue RAM +const int dfqsize = 1024; +const int dfqassoc = 1; +const int dfqblocksize = 16; + +// outside the cores +// L2 cache bank +const int l2cachesize = 262144; +const int l2cacheassoc = 16; +const int l2cacheblocksize = 64; + +// L2 directory +const int l2dirsize = 1024; +const int l2dirassoc = 0; +const int l2dirblocksize = 2; + +// crossbar +// PCX const int PCX_NUMBER_INPUT_PORTS_CROSSBAR = 8; const int PCX_NUMBER_OUTPUT_PORTS_CROSSBAR = 9; -const int PCX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =144; -//PCX buffer RAM -const int pcx_buffersize=1024; -const int pcx_bufferassoc=1; -const int pcx_bufferblocksize=32; -const int pcx_numbuffer=5; -//pcx arbiter -const int pcx_arbsize=128; -const int pcx_arbassoc=1; -const int pcx_arbblocksize=2; -const int pcx_numarb=5; - -//CPX +const int PCX_NUMBER_SIGNALS_PER_PORT_CROSSBAR = 144; +// PCX buffer RAM +const int pcx_buffersize = 1024; +const int pcx_bufferassoc = 1; +const int pcx_bufferblocksize = 32; +const int pcx_numbuffer = 5; +// pcx arbiter +const int pcx_arbsize = 128; +const int pcx_arbassoc = 1; +const int pcx_arbblocksize = 2; +const int pcx_numarb = 5; + +// CPX const int CPX_NUMBER_INPUT_PORTS_CROSSBAR = 5; const int CPX_NUMBER_OUTPUT_PORTS_CROSSBAR = 8; -const int CPX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =150; -//CPX buffer RAM -const int cpx_buffersize=1024; -const int cpx_bufferassoc=1; -const int cpx_bufferblocksize=32; -const int cpx_numbuffer=8; -//cpx arbiter -const int cpx_arbsize=128; -const int cpx_arbassoc=1; -const int cpx_arbblocksize=2; -const int cpx_numarb=8; - - - - - -const int numPhysFloatRegs=256; -const int numPhysIntRegs=32; -const int numROBEntries=192; -const int umRobs=1; - -const int BTBEntries=4096; -const int BTBTagSize=16; -const int LFSTSize=1024; -const int LQEntries=32; -const int RASSize=16; -const int SQEntries=32; -const int SSITSize=1024; -const int activity=0; -const int backComSize=5; -const int cachePorts=200; -const int choiceCtrBits=2; -const int choicePredictorSize=8192; - - -const int commitWidth=8; -const int decodeWidth=8; -const int dispatchWidth=8; -const int fetchWidth=8; -const int issueWidth=1; -const int renameWidth=8; -//what is this forwardComSize=5?? - -const int globalCtrBits=2; -const int globalHistoryBits=13; -const int globalPredictorSize=8192; - - - -const int localCtrBits=2; -const int localHistoryBits=11; -const int localHistoryTableSize=2048; -const int localPredictorSize=2048; - -const double Woutdrvnandn =30 *0.09;//(24.0 * LSCALE) -const double Woutdrvnandp =12.5 *0.09;//(10.0 * LSCALE) -const double Woutdrvnorn =7.5*0.09;//(6.0 * LSCALE) -const double Woutdrvnorp =50 * 0.09;// (40.0 * LSCALE) -const double Woutdrivern =60*0.09;//(48.0 * LSCALE) -const double Woutdriverp =100 * 0.09;//(80.0 * LSCALE) +const int CPX_NUMBER_SIGNALS_PER_PORT_CROSSBAR = 150; +// CPX buffer RAM +const int cpx_buffersize = 1024; +const int cpx_bufferassoc = 1; +const int cpx_bufferblocksize = 32; +const int cpx_numbuffer = 8; +// cpx arbiter +const int cpx_arbsize = 128; +const int cpx_arbassoc = 1; +const int cpx_arbblocksize = 2; +const int cpx_numarb = 8; + +const int numPhysFloatRegs = 256; +const int numPhysIntRegs = 32; +const int numROBEntries = 192; +const int umRobs = 1; + +const int BTBEntries = 4096; +const int BTBTagSize = 16; +const int LFSTSize = 1024; +const int LQEntries = 32; +const int RASSize = 16; +const int SQEntries = 32; +const int SSITSize = 1024; +const int activity = 0; +const int backComSize = 5; +const int cachePorts = 200; +const int choiceCtrBits = 2; +const int choicePredictorSize = 8192; + +const int commitWidth = 8; +const int decodeWidth = 8; +const int dispatchWidth = 8; +const int fetchWidth = 8; +const int issueWidth = 1; +const int renameWidth = 8; +// what is this forwardComSize=5?? + +const int globalCtrBits = 2; +const int globalHistoryBits = 13; +const int globalPredictorSize = 8192; + +const int localCtrBits = 2; +const int localHistoryBits = 11; +const int localHistoryTableSize = 2048; +const int localPredictorSize = 2048; + +const double Woutdrvnandn = 30 * 0.09; //(24.0 * LSCALE) +const double Woutdrvnandp = 12.5 * 0.09; //(10.0 * LSCALE) +const double Woutdrvnorn = 7.5 * 0.09; //(6.0 * LSCALE) +const double Woutdrvnorp = 50 * 0.09; // (40.0 * LSCALE) +const double Woutdrivern = 60 * 0.09; //(48.0 * LSCALE) +const double Woutdriverp = 100 * 0.09; //(80.0 * LSCALE) /* smtCommitPolicy=RoundRobin @@ -270,7 +257,6 @@ mem_side=system.tol2bus.port[2] */ //[system.cpu0.dtb] -//type=AlphaDT - +// type=AlphaDT #endif /* ARCH_CONST_H_ */ diff --git a/array.cc b/array.cc index 85e617b..86f60d6 100644 --- a/array.cc +++ b/array.cc @@ -29,315 +29,342 @@ * ***************************************************************************/ -#define GLOBALVAR +#define GLOBALVAR +#include "array.h" + #include "area.h" #include "decoder.h" +#include "globalvar.h" #include "parameter.h" -#include "array.h" + +#include #include #include -#include -#include "globalvar.h" using namespace std; -ArrayST::ArrayST(const InputParameter *configure_interface, - string _name, - enum Device_ty device_ty_, - bool opt_local_, - enum Core_type core_ty_, - bool _is_default) -:l_ip(*configure_interface), - name(_name), - device_ty(device_ty_), - opt_local(opt_local_), - core_ty(core_ty_), - is_default(_is_default) - { +ArrayST::ArrayST(const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, bool opt_local_, + enum Core_type core_ty_, bool _is_default) + : l_ip(*configure_interface), name(_name), device_ty(device_ty_), + opt_local(opt_local_), core_ty(core_ty_), is_default(_is_default) { - if (l_ip.cache_sz<64) l_ip.cache_sz=64; - if (l_ip.power_gating && (l_ip.assoc==0)) {l_ip.power_gating = false;} - l_ip.error_checking();//not only do the error checking but also fill some missing parameters - optimize_array(); + if (l_ip.cache_sz < 64) + l_ip.cache_sz = 64; + if (l_ip.power_gating && (l_ip.assoc == 0)) { + l_ip.power_gating = false; + } + l_ip.error_checking(); // not only do the error checking but also fill some + // missing parameters + optimize_array(); +} +void ArrayST::compute_base_power() { + // l_ip.out_w =l_ip.line_sz*8; + local_result = cacti_interface(&l_ip); + assert(local_result.cycle_time > 0); + assert(local_result.access_time > 0); + // if (name == "Int FrontRAT") + // { + // cout< (candidate_iter)->power.readOp.dynamic) - { - min_dynamic_energy = (candidate_iter)->power.readOp.dynamic; - min_dynamic_energy_iter = candidate_iter; - local_result = *(min_dynamic_energy_iter); - //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match. - - } - else - { - candidate_iter->cleanup() ; - } - - } - - - } - candidate_solutions.clear(); - } - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - double pg_reduction = power_gating_leakage_reduction(false);//array structure all retain state; - - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - double total_overhead = macro_layout_overhead*chip_PR_overhead; - local_result.area *= total_overhead; - - //maintain constant power density - double pppm_t[4] = {total_overhead,1,1,total_overhead}; - - double sckRation = g_tp.sckt_co_eff; - local_result.power.readOp.dynamic *= sckRation; - local_result.power.writeOp.dynamic *= sckRation; - local_result.power.searchOp.dynamic *= sckRation; - local_result.power.readOp.leakage *= l_ip.nbanks; - local_result.power.readOp.longer_channel_leakage = - local_result.power.readOp.leakage*long_channel_device_reduction; - - if (l_ip.assoc==0)//only use this function for CAM/FA since other array types compute pg leakage automatically - { - local_result.power.readOp.power_gated_leakage = - local_result.power.readOp.leakage*pg_reduction; - } - else - { - local_result.power.readOp.power_gated_leakage *= l_ip.nbanks;//normal array types - } - - local_result.power.readOp.power_gated_with_long_channel_leakage = local_result.power.readOp.power_gated_leakage * long_channel_device_reduction;//power-gating atop long channel - - local_result.power = local_result.power* pppm_t; - - - local_result.data_array2->power.readOp.dynamic *= sckRation; - local_result.data_array2->power.writeOp.dynamic *= sckRation; - local_result.data_array2->power.searchOp.dynamic *= sckRation; - local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.data_array2->power.readOp.longer_channel_leakage = - local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; - if (l_ip.assoc==0)//only use this function for CAM/FA since other array types compute pg leakage automatically - { - local_result.data_array2->power.readOp.power_gated_leakage = - local_result.data_array2->power.readOp.leakage*pg_reduction; - } - else - { - local_result.data_array2->power.readOp.power_gated_leakage *= l_ip.nbanks;//normal array types - } - local_result.data_array2->power.readOp.power_gated_with_long_channel_leakage = local_result.data_array2->power.readOp.power_gated_leakage * long_channel_device_reduction; - - local_result.data_array2->power = local_result.data_array2->power* pppm_t; - - - if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) - { - local_result.tag_array2->power.readOp.dynamic *= sckRation; - local_result.tag_array2->power.writeOp.dynamic *= sckRation; - local_result.tag_array2->power.searchOp.dynamic *= sckRation; - local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.tag_array2->power.readOp.power_gated_leakage *= l_ip.nbanks; - local_result.tag_array2->power.readOp.longer_channel_leakage = - local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; - - local_result.tag_array2->power.readOp.power_gated_with_long_channel_leakage = - local_result.tag_array2->power.readOp.power_gated_leakage*long_channel_device_reduction; - local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; - } + if (l_ip.assoc > 0) { + // For array structures except CAM and FA, Give warning but still provide + // a result with best timing found + if (throughput_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy throughput constraint." + << endl; + if (latency_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy latency constraint." << endl; + } + // else + // { + // /*According to "Content-Addressable Memory (CAM) Circuits and + // Architectures": A Tutorial and Survey + // by Kostas Pagiamtzis et al. + // CAM structures can be heavily pipelined and use + // look-ahead techniques, therefore timing can be + // relaxed. But McPAT does not model the + // advanced techniques. If continue optimizing, the area efficiency + // will be too low + // */ + // //For CAM and FA, stop opt if area efficiency is too low + // if (throughput_overflow==true) + // cout<< "Warning: " <<" McPAT stopped optimization on throughput + // for + //"<< name + // <<" array structure because its area efficiency is + // below + //"< (candidate_iter)->power.readOp.dynamic) { + min_dynamic_energy = (candidate_iter)->power.readOp.dynamic; + min_dynamic_energy_iter = candidate_iter; + local_result = *(min_dynamic_energy_iter); + // TODO: since results are reordered results and l_ip may miss match. + // Therefore, the final output spread sheets may show the miss match. + + } else { + candidate_iter->cleanup(); + } + } + } + candidate_solutions.clear(); + } + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + double pg_reduction = power_gating_leakage_reduction( + false); // array structure all retain state; + + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; + local_result.area *= total_overhead; + // maintain constant power density + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; + + double sckRation = g_tp.sckt_co_eff; + local_result.power.readOp.dynamic *= sckRation; + local_result.power.writeOp.dynamic *= sckRation; + local_result.power.searchOp.dynamic *= sckRation; + local_result.power.readOp.leakage *= l_ip.nbanks; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + + if (l_ip.assoc == 0) // only use this function for CAM/FA since other array + // types compute pg leakage automatically + { + local_result.power.readOp.power_gated_leakage = + local_result.power.readOp.leakage * pg_reduction; + } else { + local_result.power.readOp.power_gated_leakage *= + l_ip.nbanks; // normal array types + } + + local_result.power.readOp.power_gated_with_long_channel_leakage = + local_result.power.readOp.power_gated_leakage * + long_channel_device_reduction; // power-gating atop long channel + + local_result.power = local_result.power * pppm_t; + + local_result.data_array2->power.readOp.dynamic *= sckRation; + local_result.data_array2->power.writeOp.dynamic *= sckRation; + local_result.data_array2->power.searchOp.dynamic *= sckRation; + local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + if (l_ip.assoc == 0) // only use this function for CAM/FA since other array + // types compute pg leakage automatically + { + local_result.data_array2->power.readOp.power_gated_leakage = + local_result.data_array2->power.readOp.leakage * pg_reduction; + } else { + local_result.data_array2->power.readOp.power_gated_leakage *= + l_ip.nbanks; // normal array types + } + local_result.data_array2->power.readOp.power_gated_with_long_channel_leakage = + local_result.data_array2->power.readOp.power_gated_leakage * + long_channel_device_reduction; + + local_result.data_array2->power = local_result.data_array2->power * pppm_t; + + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { + local_result.tag_array2->power.readOp.dynamic *= sckRation; + local_result.tag_array2->power.writeOp.dynamic *= sckRation; + local_result.tag_array2->power.searchOp.dynamic *= sckRation; + local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.power_gated_leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + + local_result.tag_array2->power.readOp + .power_gated_with_long_channel_leakage = + local_result.tag_array2->power.readOp.power_gated_leakage * + long_channel_device_reduction; + local_result.tag_array2->power = local_result.tag_array2->power * pppm_t; + } } -void ArrayST::leakage_feedback(double temperature)//TODO: add the code to process power-gating leakage +void ArrayST::leakage_feedback( + double temperature) // TODO: add the code to process power-gating leakage { - // Update the temperature. l_ip is already set and error-checked in the creator function. - l_ip.temp = (unsigned int)round(temperature/10.0)*10; + // Update the temperature. l_ip is already set and error-checked in the + // creator function. + l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; - // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here. - reconfigure(&l_ip,&local_result); + // This corresponds to cacti_interface() in the initialization process. + // Leakage power is updated here. + reconfigure(&l_ip, &local_result); // Scale the power values. This is part of ArrayST::optimize_array(). - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - double total_overhead = macro_layout_overhead*chip_PR_overhead; + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; - double pppm_t[4] = {total_overhead,1,1,total_overhead}; + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; double sckRation = g_tp.sckt_co_eff; local_result.power.readOp.dynamic *= sckRation; local_result.power.writeOp.dynamic *= sckRation; local_result.power.searchOp.dynamic *= sckRation; local_result.power.readOp.leakage *= l_ip.nbanks; - local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction; - local_result.power = local_result.power* pppm_t; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + local_result.power = local_result.power * pppm_t; local_result.data_array2->power.readOp.dynamic *= sckRation; local_result.data_array2->power.writeOp.dynamic *= sckRation; local_result.data_array2->power.searchOp.dynamic *= sckRation; local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.data_array2->power = local_result.data_array2->power* pppm_t; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.data_array2->power = local_result.data_array2->power * pppm_t; - if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) - { + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { local_result.tag_array2->power.readOp.dynamic *= sckRation; local_result.tag_array2->power.writeOp.dynamic *= sckRation; local_result.tag_array2->power.searchOp.dynamic *= sckRation; local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.tag_array2->power = local_result.tag_array2->power * pppm_t; } } -ArrayST:: ~ArrayST() -{ - local_result.cleanup(); -} +ArrayST::~ArrayST() { local_result.cleanup(); } diff --git a/array.h b/array.h index 8918c67..6151ea4 100644 --- a/array.h +++ b/array.h @@ -33,68 +33,86 @@ #define ARRAY_H_ #include "basic_components.h" -#include "const.h" #include "cacti_interface.h" -#include "parameter.h" #include "component.h" +#include "const.h" +#include "parameter.h" + #include #include using namespace std; -class ArrayST :public Component{ - public: +class ArrayST : public Component { +public: ArrayST(){}; - ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true); + ArrayST(const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, bool _is_default = true); InputParameter l_ip; - string name; + string name; enum Device_ty device_ty; bool opt_local; enum Core_type core_ty; - bool is_default; - uca_org_t local_result; + bool is_default; + uca_org_t local_result; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; virtual void optimize_array(); virtual void compute_base_power(); virtual ~ArrayST(); - + void leakage_feedback(double temperature); }; -class InstCache :public Component{ +class InstCache : public Component { public: - ArrayST* caches; - ArrayST* missb; - ArrayST* ifb; - ArrayST* prefetchb; - powerDef power_t;//temp value holder for both (max) power and runtime power - InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;}; - ~InstCache(){ - if (caches) {//caches->local_result.cleanup(); - delete caches; caches=0;} - if (missb) {//missb->local_result.cleanup(); - delete missb; missb=0;} - if (ifb) {//ifb->local_result.cleanup(); - delete ifb; ifb=0;} - if (prefetchb) {//prefetchb->local_result.cleanup(); - delete prefetchb; prefetchb=0;} - }; + ArrayST *caches; + ArrayST *missb; + ArrayST *ifb; + ArrayST *prefetchb; + powerDef power_t; // temp value holder for both (max) power and runtime power + InstCache() { + caches = 0; + missb = 0; + ifb = 0; + prefetchb = 0; + }; + ~InstCache() { + if (caches) { // caches->local_result.cleanup(); + delete caches; + caches = 0; + } + if (missb) { // missb->local_result.cleanup(); + delete missb; + missb = 0; + } + if (ifb) { // ifb->local_result.cleanup(); + delete ifb; + ifb = 0; + } + if (prefetchb) { // prefetchb->local_result.cleanup(); + delete prefetchb; + prefetchb = 0; + } + }; }; -class DataCache :public InstCache{ +class DataCache : public InstCache { public: - ArrayST* wbb; - DataCache(){wbb=0;}; - ~DataCache(){ - if (wbb) {//wbb->local_result.cleanup(); - delete wbb; wbb=0;} - }; + ArrayST *wbb; + DataCache() { wbb = 0; }; + ~DataCache() { + if (wbb) { // wbb->local_result.cleanup(); + delete wbb; + wbb = 0; + } + }; }; #endif /* TLB_H_ */ diff --git a/basic_components.cc b/basic_components.cc index 371cf23..8e87cff 100644 --- a/basic_components.cc +++ b/basic_components.cc @@ -30,119 +30,107 @@ ***************************************************************************/ #include "basic_components.h" -#include + #include #include +#include -double longer_channel_device_reduction( - enum Device_ty device_ty, - enum Core_type core_ty) -{ - - double longer_channel_device_percentage_core; - double longer_channel_device_percentage_uncore; - double longer_channel_device_percentage_llc; - - double long_channel_device_reduction; - - longer_channel_device_percentage_llc = 1.0; - longer_channel_device_percentage_uncore = 0.82; - if (core_ty==OOO) - { - longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam - //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam - - } - else - { - longer_channel_device_percentage_core = 0.8;//0.8;//Niagara - //longer_channel_device_percentage_uncore = 0.9;//Niagara - } - - if (device_ty==Core_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_core) - + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==Uncore_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_uncore) - + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==LLC_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_llc) - + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction; - } - else - { - cout<<"unknown device category"< const double cdb_overhead = 1.1; -enum FU_type { - FPU, - ALU, - MUL -}; +enum FU_type { FPU, ALU, MUL }; -enum Renaming_type { - RAMbased, - CAMbased -}; +enum Renaming_type { RAMbased, CAMbased }; -enum Scheduler_type { - PhysicalRegFile, - ReservationStation -}; +enum Scheduler_type { PhysicalRegFile, ReservationStation }; -enum cache_level { - L2, - L3, - L1Directory, - L2Directory -}; +enum cache_level { L2, L3, L1Directory, L2Directory }; enum MemoryCtrl_type { - MC, //memory controller - FLASHC //flash controller + MC, // memory controller + FLASHC // flash controller }; enum Dir_type { - ST,//shadowed tag - DC,//directory cache - SBT,//static bank tag - NonDir + ST, // shadowed tag + DC, // directory cache + SBT, // static bank tag + NonDir }; -enum Cache_policy { - Write_through, - Write_back -}; +enum Cache_policy { Write_through, Write_back }; -enum Device_ty { - Core_device, - Uncore_device, - LLC_device -}; +enum Device_ty { Core_device, Uncore_device, LLC_device }; -enum Core_type { - OOO, - Inorder -}; +enum Core_type { OOO, Inorder }; -class statsComponents -{ - public: - double access; - double hit; - double miss; +class statsComponents { +public: + double access; + double hit; + double miss; - statsComponents() : access(0), hit(0), miss(0) {} - statsComponents(const statsComponents & obj) { *this = obj; } - statsComponents & operator=(const statsComponents & rhs) - { - access = rhs.access; - hit = rhs.hit; - miss = rhs.miss; - return *this; - } - void reset() { access = 0; hit = 0; miss = 0;} + statsComponents() : access(0), hit(0), miss(0) {} + statsComponents(const statsComponents &obj) { *this = obj; } + statsComponents &operator=(const statsComponents &rhs) { + access = rhs.access; + hit = rhs.hit; + miss = rhs.miss; + return *this; + } + void reset() { + access = 0; + hit = 0; + miss = 0; + } - friend statsComponents operator+(const statsComponents & x, const statsComponents & y); - friend statsComponents operator*(const statsComponents & x, double const * const y); + friend statsComponents operator+(const statsComponents &x, + const statsComponents &y); + friend statsComponents operator*(const statsComponents &x, + double const *const y); }; -class statsDef -{ - public: - statsComponents readAc; - statsComponents writeAc; - statsComponents searchAc; +class statsDef { +public: + statsComponents readAc; + statsComponents writeAc; + statsComponents searchAc; - statsDef() : readAc(), writeAc(),searchAc() { } - void reset() { readAc.reset(); writeAc.reset();searchAc.reset();} + statsDef() : readAc(), writeAc(), searchAc() {} + void reset() { + readAc.reset(); + writeAc.reset(); + searchAc.reset(); + } - friend statsDef operator+(const statsDef & x, const statsDef & y); - friend statsDef operator*(const statsDef & x, double const * const y); + friend statsDef operator+(const statsDef &x, const statsDef &y); + friend statsDef operator*(const statsDef &x, double const *const y); }; -double longer_channel_device_reduction( - enum Device_ty device_ty=Core_device, - enum Core_type core_ty=Inorder); +double longer_channel_device_reduction(enum Device_ty device_ty = Core_device, + enum Core_type core_ty = Inorder); -double power_gating_leakage_reduction( - bool retain_state=false); +double power_gating_leakage_reduction(bool retain_state = false); class CoreDynParam { public: - CoreDynParam(){}; - CoreDynParam(ParseXML *XML_interface, int ithCore_); - // :XML(XML_interface), - // ithCore(ithCore_) - // core_ty(inorder), - // rm_ty(CAMbased), - // scheu_ty(PhysicalRegFile), - // clockRate(1e9),//1GHz - // arch_ireg_width(32), - // arch_freg_width(32), - // phy_ireg_width(128), - // phy_freg_width(128), - // perThreadState(8), - // globalCheckpoint(32), - // instructionLength(32){}; - //ParseXML * XML; - bool opt_local; - bool x86; - bool Embedded; - enum Core_type core_ty; - enum Renaming_type rm_ty; - enum Scheduler_type scheu_ty; - double clockRate,executionTime; - int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width, hthread_width; - int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries; - int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW; - int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length; - int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines; - int num_alus, num_muls; - double num_fpus; - int int_data_width, fp_data_width,v_address_width, p_address_width; - double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles; - bool regWindowing,multithreaded; - double pppm_lkg_multhread[4]; - double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle, - MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle, - FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle, - FPU_cdb_duty_cycle; - double vdd; - double power_gating_vcc; - ~CoreDynParam(){}; + CoreDynParam(){}; + CoreDynParam(ParseXML *XML_interface, int ithCore_); + // :XML(XML_interface), + // ithCore(ithCore_) + // core_ty(inorder), + // rm_ty(CAMbased), + // scheu_ty(PhysicalRegFile), + // clockRate(1e9),//1GHz + // arch_ireg_width(32), + // arch_freg_width(32), + // phy_ireg_width(128), + // phy_freg_width(128), + // perThreadState(8), + // globalCheckpoint(32), + // instructionLength(32){}; + // ParseXML * XML; + bool opt_local; + bool x86; + bool Embedded; + enum Core_type core_ty; + enum Renaming_type rm_ty; + enum Scheduler_type scheu_ty; + double clockRate, executionTime; + int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width, + hthread_width; + int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, + num_ffreelist_entries; + int fetchW, decodeW, issueW, peak_issueW, commitW, peak_commitW, predictionW, + fp_issueW, fp_decodeW; + int perThreadState, globalCheckpoint, instruction_length, pc_width, + opcode_length, micro_opcode_length; + int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, + num_fp_pipelines; + int num_alus, num_muls; + double num_fpus; + int int_data_width, fp_data_width, v_address_width, p_address_width; + double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles; + bool regWindowing, multithreaded; + double pppm_lkg_multhread[4]; + double IFU_duty_cycle, BR_duty_cycle, LSU_duty_cycle, MemManU_I_duty_cycle, + MemManU_D_duty_cycle, ALU_duty_cycle, MUL_duty_cycle, FPU_duty_cycle, + ALU_cdb_duty_cycle, MUL_cdb_duty_cycle, FPU_cdb_duty_cycle; + double vdd; + double power_gating_vcc; + ~CoreDynParam(){}; }; class CacheDynParam { public: - CacheDynParam(){}; - CacheDynParam(ParseXML *XML_interface, int ithCache_); - string name; - enum Dir_type dir_ty; - double clockRate,executionTime; - double capacity, blockW, assoc, nbanks; - double throughput, latency; - double duty_cycle, dir_duty_cycle; - //double duty_cycle; - int missb_size, fu_size, prefetchb_size, wbb_size; - double vdd; - double power_gating_vcc; - ~CacheDynParam(){}; + CacheDynParam(){}; + CacheDynParam(ParseXML *XML_interface, int ithCache_); + string name; + enum Dir_type dir_ty; + double clockRate, executionTime; + double capacity, blockW, assoc, nbanks; + double throughput, latency; + double duty_cycle, dir_duty_cycle; + // double duty_cycle; + int missb_size, fu_size, prefetchb_size, wbb_size; + double vdd; + double power_gating_vcc; + ~CacheDynParam(){}; }; class MCParam { public: - MCParam(){}; - MCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate,num_mcs, peakDataTransferRate, num_channels; - // double mcTEPowerperGhz; - // double mcPHYperGbit; - // double area; - int llcBlockSize, dataBusWidth, addressBusWidth; - int opcodeW; - int memAccesses; - int memRank; - int type; - double frontend_duty_cycle, duty_cycle, perc_load; - double executionTime, reads, writes; - bool LVDS, withPHY; - double vdd; - double power_gating_vcc; - ~MCParam(){}; + MCParam(){}; + MCParam(ParseXML *XML_interface, int ithCache_); + string name; + double clockRate, num_mcs, peakDataTransferRate, num_channels; + // double mcTEPowerperGhz; + // double mcPHYperGbit; + // double area; + int llcBlockSize, dataBusWidth, addressBusWidth; + int opcodeW; + int memAccesses; + int memRank; + int type; + double frontend_duty_cycle, duty_cycle, perc_load; + double executionTime, reads, writes; + bool LVDS, withPHY; + double vdd; + double power_gating_vcc; + ~MCParam(){}; }; class NoCParam { public: - NoCParam(){}; - NoCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int flit_size; - int input_ports, output_ports, min_ports, global_linked_ports; - int virtual_channel_per_port,input_buffer_entries_per_vc; - int horizontal_nodes,vertical_nodes, total_nodes; - double executionTime, total_access, link_throughput,link_latency, - duty_cycle, chip_coverage, route_over_perc; - bool has_global_link, type; - double vdd; - double power_gating_vcc; - ~NoCParam(){}; + NoCParam(){}; + NoCParam(ParseXML *XML_interface, int ithCache_); + string name; + double clockRate; + int flit_size; + int input_ports, output_ports, min_ports, global_linked_ports; + int virtual_channel_per_port, input_buffer_entries_per_vc; + int horizontal_nodes, vertical_nodes, total_nodes; + double executionTime, total_access, link_throughput, link_latency, duty_cycle, + chip_coverage, route_over_perc; + bool has_global_link, type; + double vdd; + double power_gating_vcc; + ~NoCParam(){}; }; class ProcParam { public: - ProcParam(){}; - ProcParam(ParseXML *XML_interface, int ithCache_); - string name; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel; - bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir; - double vdd; - double power_gating_vcc; - ~ProcParam(){}; + ProcParam(){}; + ProcParam(ParseXML *XML_interface, int ithCache_); + string name; + int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir, numMC, numMCChannel; + bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir; + double vdd; + double power_gating_vcc; + ~ProcParam(){}; }; class NIUParam { public: - NIUParam(){}; - NIUParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_units; - int type; - double duty_cycle, perc_load; - double vdd; - double power_gating_vcc; - ~NIUParam(){}; + NIUParam(){}; + NIUParam(ParseXML *XML_interface, int ithCache_); + string name; + double clockRate; + int num_units; + int type; + double duty_cycle, perc_load; + double vdd; + double power_gating_vcc; + ~NIUParam(){}; }; class PCIeParam { public: - PCIeParam(){}; - PCIeParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_channels, num_units; - bool withPHY; - int type; - double duty_cycle, perc_load; - double vdd; - double power_gating_vcc; - ~PCIeParam(){}; + PCIeParam(){}; + PCIeParam(ParseXML *XML_interface, int ithCache_); + string name; + double clockRate; + int num_channels, num_units; + bool withPHY; + int type; + double duty_cycle, perc_load; + double vdd; + double power_gating_vcc; + ~PCIeParam(){}; }; #endif /* BASIC_COMPONENTS_H_ */ diff --git a/cacti/Ucache.cc b/cacti/Ucache.cc index 946ed9d..5b844a9 100644 --- a/cacti/Ucache.cc +++ b/cacti/Ucache.cc @@ -29,10 +29,7 @@ * ***************************************************************************/ - -#include -#include - +#include "Ucache.h" #include "area.h" #include "bank.h" @@ -41,73 +38,80 @@ #include "const.h" #include "decoder.h" #include "parameter.h" -#include "Ucache.h" #include "subarray.h" #include "uca.h" -#include -#include #include +#include #include +#include +#include +#include using namespace std; const uint32_t nthreads = NTHREADS; - -void min_values_t::update_min_values(const min_values_t * val) -{ - min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; - min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; - min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; - min_area = (min_area > val->min_area) ? val->min_area : min_area; - min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; +void min_values_t::update_min_values(const min_values_t *val) { + min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; + min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; + min_leakage = + (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; + min_area = (min_area > val->min_area) ? val->min_area : min_area; + min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; } - - -void min_values_t::update_min_values(const uca_org_t & res) -{ - min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; - min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; - min_area = (min_area > res.area) ? res.area : min_area; - min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; +void min_values_t::update_min_values(const uca_org_t &res) { + min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; + min_dyn = + (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res.power.readOp.leakage) + ? res.power.readOp.leakage + : min_leakage; + min_area = (min_area > res.area) ? res.area : min_area; + min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; } -void min_values_t::update_min_values(const nuca_org_t * res) -{ - min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; - min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; - min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; - min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; +void min_values_t::update_min_values(const nuca_org_t *res) { + min_delay = + (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; + min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) + ? res->nuca_pda.power.readOp.dynamic + : min_dyn; + min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) + ? res->nuca_pda.power.readOp.leakage + : min_leakage; + min_area = (min_area > res->nuca_pda.area.get_area()) + ? res->nuca_pda.area.get_area() + : min_area; + min_cyc = + (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; } -void min_values_t::update_min_values(const mem_array * res) -{ - min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; - min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; - min_area = (min_area > res->area) ? res->area : min_area; - min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; +void min_values_t::update_min_values(const mem_array *res) { + min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; + min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic + : min_dyn; + min_leakage = (min_leakage > res->power.readOp.leakage) + ? res->power.readOp.leakage + : min_leakage; + min_area = (min_area > res->area) ? res->area : min_area; + min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; } - - -void * calc_time_mt_wrapper(void * void_obj) -{ - calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; - uint32_t tid = calc_obj->tid; - list & data_arr = calc_obj->data_arr; - list & tag_arr = calc_obj->tag_arr; - bool is_tag = calc_obj->is_tag; - bool pure_ram = calc_obj->pure_ram; - bool pure_cam = calc_obj->pure_cam; - bool is_main_mem = calc_obj->is_main_mem; - double Nspd_min = calc_obj->Nspd_min; - min_values_t * data_res = calc_obj->data_res; - min_values_t * tag_res = calc_obj->tag_res; +void *calc_time_mt_wrapper(void *void_obj) { + calc_time_mt_wrapper_struct *calc_obj = + (calc_time_mt_wrapper_struct *)void_obj; + uint32_t tid = calc_obj->tid; + list &data_arr = calc_obj->data_arr; + list &tag_arr = calc_obj->tag_arr; + bool is_tag = calc_obj->is_tag; + bool pure_ram = calc_obj->pure_ram; + bool pure_cam = calc_obj->pure_cam; + bool is_main_mem = calc_obj->is_main_mem; + double Nspd_min = calc_obj->Nspd_min; + min_values_t *data_res = calc_obj->data_res; + min_values_t *tag_res = calc_obj->tag_res; data_arr.clear(); data_arr.push_back(new mem_array); @@ -117,8 +121,7 @@ void * calc_time_mt_wrapper(void * void_obj) uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; - uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; - + uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; bool is_valid_partition; int wt_min, wt_max; @@ -127,91 +130,80 @@ void * calc_time_mt_wrapper(void * void_obj) if (g_ip->wt == 0) { wt_min = Low_swing; wt_max = Low_swing; - } - else { + } else { wt_min = Global; - wt_max = Low_swing-1; + wt_max = Low_swing - 1; } - } - else { + } else { wt_min = Global; wt_max = Low_swing; } - for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) - { - for (int wr = wt_min; wr <= wt_max; wr++) - { - for (uint32_t iter = tid; iter < niter; iter += nthreads) - { + for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) { + for (int wr = wt_min; wr <= wt_max; wr++) { + for (uint32_t iter = tid; iter < niter; iter += nthreads) { // reconstruct Ndwl, Ndbl, Ndcm unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); - unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); + unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter); unsigned int Ndcm = 1 << (iter % Ndcm_niter); - for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) - { - for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) - { - //for debuging - if (g_ip->force_cache_config && is_tag == false) - { - wr = g_ip->wt; + for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; + Ndsam_lev_1 *= 2) { + for (unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; + Ndsam_lev_2 *= 2) { + // for debuging + if (g_ip->force_cache_config && is_tag == false) { + wr = g_ip->wt; Ndwl = g_ip->ndwl; Ndbl = g_ip->ndbl; Ndcm = g_ip->ndcm; - if(g_ip->nspd != 0) { - Nspd = g_ip->nspd; + if (g_ip->nspd != 0) { + Nspd = g_ip->nspd; } - if(g_ip->ndsam1 != 0) { - Ndsam_lev_1 = g_ip->ndsam1; - Ndsam_lev_2 = g_ip->ndsam2; + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = g_ip->ndsam1; + Ndsam_lev_2 = g_ip->ndsam2; } } - if (is_tag == true) - { - is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - tag_arr.back(), 0, NULL, NULL, - is_main_mem); + if (is_tag == true) { + is_valid_partition = + calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, + Ndcm, Ndsam_lev_1, Ndsam_lev_2, tag_arr.back(), + 0, NULL, NULL, is_main_mem); } - // If it's a fully-associative cache, the data array partition parameters are identical to that of - // the tag array, so compute data array partition properties also here. - if (is_tag == false || g_ip->fully_assoc) - { - is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - data_arr.back(), 0, NULL, NULL, - is_main_mem); + // If it's a fully-associative cache, the data array partition + // parameters are identical to that of the tag array, so compute + // data array partition properties also here. + if (is_tag == false || g_ip->fully_assoc) { + is_valid_partition = + calculate_time(is_tag /*false*/, pure_ram, pure_cam, Nspd, + Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + data_arr.back(), 0, NULL, NULL, is_main_mem); } - if (is_valid_partition) - { - if (is_tag == true) - { - tag_arr.back()->wt = (enum Wire_type) wr; + if (is_valid_partition) { + if (is_tag == true) { + tag_arr.back()->wt = (enum Wire_type)wr; tag_res->update_min_values(tag_arr.back()); tag_arr.push_back(new mem_array); } - if (is_tag == false || g_ip->fully_assoc) - { - data_arr.back()->wt = (enum Wire_type) wr; + if (is_tag == false || g_ip->fully_assoc) { + data_arr.back()->wt = (enum Wire_type)wr; data_res->update_min_values(data_arr.back()); data_arr.push_back(new mem_array); } } - if (g_ip->force_cache_config && is_tag == false) - { - wr = wt_max; - iter = niter; - if(g_ip->nspd != 0) { - Nspd = MAXDATASPD; - } - if (g_ip->ndsam1 != 0) { - Ndsam_lev_1 = MAX_COL_MUX+1; - Ndsam_lev_2 = MAX_COL_MUX+1; - } + if (g_ip->force_cache_config && is_tag == false) { + wr = wt_max; + iter = niter; + if (g_ip->nspd != 0) { + Nspd = MAXDATASPD; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = MAX_COL_MUX + 1; + Ndsam_lev_2 = MAX_COL_MUX + 1; + } } } } @@ -227,41 +219,28 @@ void * calc_time_mt_wrapper(void * void_obj) pthread_exit(NULL); } +bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, + unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, + unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, + mem_array *ptr_array, int flag_results_populate, + results_mem_array *ptr_results, uca_org_t *ptr_fin_res, + bool is_main_mem) { + DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, + Ndsam_lev_1, Ndsam_lev_2, is_main_mem); - -bool calculate_time( - bool is_tag, - int pure_ram, - bool pure_cam, - double Nspd, - unsigned int Ndwl, - unsigned int Ndbl, - unsigned int Ndcm, - unsigned int Ndsam_lev_1, - unsigned int Ndsam_lev_2, - mem_array *ptr_array, - int flag_results_populate, - results_mem_array *ptr_results, - uca_org_t *ptr_fin_res, - bool is_main_mem) -{ - DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); - - if (dyn_p.is_valid == false) - { + if (dyn_p.is_valid == false) { return false; } - UCA * uca = new UCA(dyn_p); + UCA *uca = new UCA(dyn_p); + if (flag_results_populate) { // For the final solution, populate the + // ptr_results data structure -- TODO: copy only + // necessary variables + } else { - if (flag_results_populate) - { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables - } - else - { - - collect_uca_results(Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, uca, ptr_array, is_main_mem); + collect_uca_results(Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, uca, + ptr_array, is_main_mem); } delete uca; @@ -269,415 +248,437 @@ bool calculate_time( } void collect_uca_results( -// bool is_tag, -// int pure_ram, -// bool pure_cam, - double Nspd, - unsigned int Ndwl, - unsigned int Ndbl, - unsigned int Ndcm, - unsigned int Ndsam_lev_1, - unsigned int Ndsam_lev_2, - UCA const * const uca, - mem_array * const ptr_array, -// int flag_results_populate, -// results_mem_array *ptr_results, -// uca_org_t *ptr_fin_res, - bool is_main_mem) -{ - int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; - int num_mats = uca->bank.dp.num_mats; - bool is_fa = uca->bank.dp.fully_assoc; - bool pure_cam = uca->bank.dp.pure_cam; - ptr_array->Ndwl = Ndwl; - ptr_array->Ndbl = Ndbl; - ptr_array->Nspd = Nspd; - ptr_array->deg_bl_muxing = uca->bank.dp.deg_bl_muxing; - ptr_array->Ndsam_lev_1 = Ndsam_lev_1; - ptr_array->Ndsam_lev_2 = Ndsam_lev_2; - ptr_array->access_time = uca->access_time; - ptr_array->cycle_time = uca->cycle_time; - ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; - ptr_array->area_ram_cells = uca->area_all_dataramcells; - ptr_array->area = uca->area.get_area(); - ptr_array->height = uca->area.h; - ptr_array->width = uca->area.w; - ptr_array->mat_height = uca->bank.mat.area.h; - ptr_array->mat_length = uca->bank.mat.area.w; - ptr_array->subarray_height = uca->bank.mat.subarray.area.h; - ptr_array->subarray_length = uca->bank.mat.subarray.area.w; - ptr_array->power = uca->power; - ptr_array->delay_senseamp_mux_decoder = + // bool is_tag, + // int pure_ram, + // bool pure_cam, + double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, + unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, UCA const *const uca, + mem_array *const ptr_array, + // int flag_results_populate, + // results_mem_array *ptr_results, + // uca_org_t *ptr_fin_res, + bool is_main_mem) { + int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; + int num_mats = uca->bank.dp.num_mats; + bool is_fa = uca->bank.dp.fully_assoc; + bool pure_cam = uca->bank.dp.pure_cam; + ptr_array->Ndwl = Ndwl; + ptr_array->Ndbl = Ndbl; + ptr_array->Nspd = Nspd; + ptr_array->deg_bl_muxing = uca->bank.dp.deg_bl_muxing; + ptr_array->Ndsam_lev_1 = Ndsam_lev_1; + ptr_array->Ndsam_lev_2 = Ndsam_lev_2; + ptr_array->access_time = uca->access_time; + ptr_array->cycle_time = uca->cycle_time; + ptr_array->multisubbank_interleave_cycle_time = + uca->multisubbank_interleave_cycle_time; + ptr_array->area_ram_cells = uca->area_all_dataramcells; + ptr_array->area = uca->area.get_area(); + ptr_array->height = uca->area.h; + ptr_array->width = uca->area.w; + ptr_array->mat_height = uca->bank.mat.area.h; + ptr_array->mat_length = uca->bank.mat.area.w; + ptr_array->subarray_height = uca->bank.mat.subarray.area.h; + ptr_array->subarray_length = uca->bank.mat.subarray.area.w; + ptr_array->power = uca->power; + ptr_array->delay_senseamp_mux_decoder = MAX(uca->delay_array_to_sa_mux_lev_1_decoder, uca->delay_array_to_sa_mux_lev_2_decoder); - ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; - ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; - - ptr_array->delay_route_to_bank = uca->htree_in_add->delay; - ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; - ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; - ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; - ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; - ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; - ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; - ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; - ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; - ptr_array->delay_comparator = uca->bank.mat.delay_comparator; - - ptr_array->all_banks_height = uca->area.h; - ptr_array->all_banks_width = uca->area.w; - ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); - - ptr_array->power_routing_to_bank = uca->power_routing_to_bank; - ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; - ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; -// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; -// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; - ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; - ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; - ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; - ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; - ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; - ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; - ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; - ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bitlines = uca->bank.mat.power_bitline; - ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_sense_amps = uca->bank.mat.power_sa; - ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; - ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; - ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_comparators = uca->bank.mat.power_comparator; - ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; - -// cout << " num of mats: " << dyn_p.num_mats << endl; - if (is_fa || pure_cam) - { + ptr_array->delay_before_subarray_output_driver = + uca->delay_before_subarray_output_driver; + ptr_array->delay_from_subarray_output_driver_to_output = + uca->delay_from_subarray_out_drv_to_out; + + ptr_array->delay_route_to_bank = uca->htree_in_add->delay; + ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; + ptr_array->delay_row_predecode_driver_and_block = + uca->bank.mat.r_predec->delay; + ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; + ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; + ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; + ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; + ptr_array->delay_subarray_output_driver = + uca->bank.mat.delay_subarray_out_drv_htree; + ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; + ptr_array->delay_comparator = uca->bank.mat.delay_comparator; + + ptr_array->all_banks_height = uca->area.h; + ptr_array->all_banks_width = uca->area.w; + ptr_array->area_efficiency = + uca->area_all_dataramcells * 100 / (uca->area.get_area()); + + ptr_array->power_routing_to_bank = uca->power_routing_to_bank; + ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; + ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; + // cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; + // cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = + uca->bank.mat.r_predec->driver_power; + ptr_array->power_row_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; + ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; + ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_drivers = + uca->bank.mat.b_mux_predec->driver_power; + ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_blocks = + uca->bank.mat.b_mux_predec->block_power; + ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; + ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = + uca->bank.mat.sa_mux_lev_1_predec->driver_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = + uca->bank.mat.sa_mux_lev_1_predec->block_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_decoders = + uca->bank.mat.power_sa_mux_lev_1_decoders; + ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = + uca->bank.mat.sa_mux_lev_2_predec->driver_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = + uca->bank.mat.sa_mux_lev_2_predec->block_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_decoders = + uca->bank.mat.power_sa_mux_lev_2_decoders; + ptr_array->power_senseamp_mux_lev_2_decoders.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bitlines = uca->bank.mat.power_bitline; + ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_sense_amps = uca->bank.mat.power_sa; + ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; + ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_output_drivers_at_subarray = + uca->bank.mat.power_subarray_out_drv; + ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_comparators = uca->bank.mat.power_comparator; + ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; + + // cout << " num of mats: " << dyn_p.num_mats << endl; + if (is_fa || pure_cam) { ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; -// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power; -// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline; -// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats; - ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; + ptr_array->power_searchline_precharge = + uca->bank.mat.power_searchline_precharge; ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; ptr_array->power_matchlines = uca->bank.mat.power_matchline; ptr_array->power_matchlines.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; + ptr_array->power_matchline_precharge = + uca->bank.mat.power_matchline_precharge; ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; -// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy; - ptr_array->read_energy = uca->read_energy; - ptr_array->write_energy = uca->write_energy; - ptr_array->precharge_energy = uca->precharge_energy; - ptr_array->refresh_power = uca->refresh_power; - ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; - ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; - ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; - - ptr_array->precharge_delay = uca->precharge_delay; - - -// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<array_leakage= uca->bank.array_leakage; - ptr_array->wl_leakage= uca->bank.wl_leakage; - ptr_array->cl_leakage= uca->bank.cl_leakage; - if (g_ip->power_gating) - { - ptr_array->sram_sleep_tx_width= uca->bank.mat.sram_sleep_tx->width; - ptr_array->sram_sleep_tx_area= uca->bank.mat.array_sleep_tx_area; - ptr_array->sram_sleep_wakeup_latency= uca->bank.mat.array_wakeup_t; - ptr_array->sram_sleep_wakeup_energy= uca->bank.mat.array_wakeup_e.readOp.dynamic; - - ptr_array->wl_sleep_tx_width= uca->bank.mat.row_dec->sleeptx->width; - ptr_array->wl_sleep_tx_area= uca->bank.mat.wl_sleep_tx_area; - ptr_array->wl_sleep_wakeup_latency= uca->bank.mat.wl_wakeup_t; - ptr_array->wl_sleep_wakeup_energy= uca->bank.mat.wl_wakeup_e.readOp.dynamic; - - ptr_array->bl_floating_wakeup_latency= uca->bank.mat.blfloating_wakeup_t; - ptr_array->bl_floating_wakeup_energy= uca->bank.mat.blfloating_wakeup_e.readOp.dynamic; - - - } - + ptr_array->power_matchline_to_wordline_drv = + uca->bank.mat.power_ml_to_ram_wl_drv; + // cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<num_active_mats = uca->bank.dp.num_act_mats_hor_dir; - ptr_array->num_submarray_mats = uca->bank.mat.num_subarrays_per_mat; - // cout<<"array_leakage"<array_leakage<wl_leakage<cl_leakage<activate_energy = uca->activate_energy; + ptr_array->read_energy = uca->read_energy; + ptr_array->write_energy = uca->write_energy; + ptr_array->precharge_energy = uca->precharge_energy; + ptr_array->refresh_power = uca->refresh_power; + ptr_array->leak_power_subbank_closed_page = + uca->leak_power_subbank_closed_page; + ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; + ptr_array->leak_power_request_and_reply_networks = + uca->leak_power_request_and_reply_networks; + + ptr_array->precharge_delay = uca->precharge_delay; + + // cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<array_leakage = uca->bank.array_leakage; + ptr_array->wl_leakage = uca->bank.wl_leakage; + ptr_array->cl_leakage = uca->bank.cl_leakage; + if (g_ip->power_gating) { + ptr_array->sram_sleep_tx_width = uca->bank.mat.sram_sleep_tx->width; + ptr_array->sram_sleep_tx_area = uca->bank.mat.array_sleep_tx_area; + ptr_array->sram_sleep_wakeup_latency = uca->bank.mat.array_wakeup_t; + ptr_array->sram_sleep_wakeup_energy = + uca->bank.mat.array_wakeup_e.readOp.dynamic; + + ptr_array->wl_sleep_tx_width = uca->bank.mat.row_dec->sleeptx->width; + ptr_array->wl_sleep_tx_area = uca->bank.mat.wl_sleep_tx_area; + ptr_array->wl_sleep_wakeup_latency = uca->bank.mat.wl_wakeup_t; + ptr_array->wl_sleep_wakeup_energy = + uca->bank.mat.wl_wakeup_e.readOp.dynamic; + + ptr_array->bl_floating_wakeup_latency = uca->bank.mat.blfloating_wakeup_t; + ptr_array->bl_floating_wakeup_energy = + uca->bank.mat.blfloating_wakeup_e.readOp.dynamic; + } - ptr_array->long_channel_leakage_reduction_periperal = uca->long_channel_leakage_reduction_periperal; - ptr_array->long_channel_leakage_reduction_memcell = uca->long_channel_leakage_reduction_memcell; + ptr_array->num_active_mats = uca->bank.dp.num_act_mats_hor_dir; + ptr_array->num_submarray_mats = uca->bank.mat.num_subarrays_per_mat; + // cout<<"array_leakage"<array_leakage<wl_leakage<cl_leakage<long_channel_leakage_reduction_periperal = + uca->long_channel_leakage_reduction_periperal; + ptr_array->long_channel_leakage_reduction_memcell = + uca->long_channel_leakage_reduction_memcell; } - -bool check_uca_org(uca_org_t & u, min_values_t *minval) -{ - if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { +bool check_uca_org(uca_org_t &u, min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { return false; } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn) * 100 > g_ip->dynamic_power_dev) { return false; } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + if (((u.power.readOp.leakage - minval->min_leakage) / minval->min_leakage) * + 100 > g_ip->leakage_power_dev) { return false; } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 > g_ip->cycle_time_dev) { return false; } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { + if (((u.area - minval->min_area) / minval->min_area) * 100 > g_ip->area_dev) { return false; } return true; } -bool check_mem_org(mem_array & u, const min_values_t *minval) -{ - if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { +bool check_mem_org(mem_array &u, const min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { return false; } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn) * 100 > g_ip->dynamic_power_dev) { return false; } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + if (((u.power.readOp.leakage - minval->min_leakage) / minval->min_leakage) * + 100 > g_ip->leakage_power_dev) { return false; } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 > g_ip->cycle_time_dev) { return false; } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { + if (((u.area - minval->min_area) / minval->min_area) * 100 > g_ip->area_dev) { return false; } return true; } - - - -void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist) -{ +void find_optimal_uca(uca_org_t *res, min_values_t *minval, + list &ulist) { double cost = 0; double min_cost = BIGNUM; float d, a, dp, lp, c; dp = g_ip->dynamic_power_wt; lp = g_ip->leakage_power_wt; - a = g_ip->area_wt; - d = g_ip->delay_wt; - c = g_ip->cycle_time_wt; + a = g_ip->area_wt; + d = g_ip->delay_wt; + c = g_ip->cycle_time_wt; - if (ulist.empty() == true) - { + if (ulist.empty() == true) { cout << "ERROR: no valid cache organizations found" << endl; exit(0); } - for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++) - { - if (g_ip->ed == 1) - { - cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { + for (list::iterator niter = ulist.begin(); niter != ulist.end(); + niter++) { + if (g_ip->ed == 1) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { min_cost = cost; *res = (*(niter)); } - } - else if (g_ip->ed == 2) - { - cost = ((niter)->access_time/minval->min_delay)* - ((niter)->access_time/minval->min_delay)* - ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { + } else if (g_ip->ed == 2) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { min_cost = cost; *res = (*(niter)); } - } - else - { + } else { /* * check whether the current organization * meets the input deviation constraints */ bool v = check_uca_org(*niter, minval); - //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + // if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove + // this after leakage modeling - if (v) - { - cost = (d * ((niter)->access_time/minval->min_delay) + - c * ((niter)->cycle_time/minval->min_cyc) + - dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + - lp * ((niter)->power.readOp.leakage/minval->min_leakage) + - a * ((niter)->area/minval->min_area)); - //fprintf(stderr, "cost = %g\n", cost); + if (v) { + cost = (d * ((niter)->access_time / minval->min_delay) + + c * ((niter)->cycle_time / minval->min_cyc) + + dp * ((niter)->power.readOp.dynamic / minval->min_dyn) + + lp * ((niter)->power.readOp.leakage / minval->min_leakage) + + a * ((niter)->area / minval->min_area)); + // fprintf(stderr, "cost = %g\n", cost); if (min_cost > cost) { min_cost = cost; *res = (*(niter)); niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; + if (niter != ulist.begin()) + niter--; } - } - else { + } else { niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; + if (niter != ulist.begin()) + niter--; } } } - if (min_cost == BIGNUM) - { + if (min_cost == BIGNUM) { cout << "ERROR: no cache organizations met optimization criteria" << endl; exit(0); } } - - -void filter_tag_arr(const min_values_t * min, list & list) -{ +void filter_tag_arr(const min_values_t *min, list &list) { double cost = BIGNUM; double cur_cost; - double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; - mem_array * res = NULL; + double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, + wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, + wt_area = g_ip->area_wt; + mem_array *res = NULL; - if (list.empty() == true) - { + if (list.empty() == true) { cout << "ERROR: no valid tag organizations found" << endl; exit(1); } - - while (list.empty() != true) - { + while (list.empty() != true) { bool v = check_mem_org(*list.back(), min); - if (v) - { - cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + - wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + - wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + - wt_area * (list.back()->area/min->min_area) + - wt_cyc * (list.back()->cycle_time/min->min_cyc); - } - else - { + if (v) { + cur_cost = + wt_delay * (list.back()->access_time / min->min_delay) + + wt_dyn * (list.back()->power.readOp.dynamic / min->min_dyn) + + wt_leakage * (list.back()->power.readOp.leakage / min->min_leakage) + + wt_area * (list.back()->area / min->min_area) + + wt_cyc * (list.back()->cycle_time / min->min_cyc); + } else { cur_cost = BIGNUM; } - if (cur_cost < cost) - { - if (res != NULL) - { + if (cur_cost < cost) { + if (res != NULL) { delete res; } cost = cur_cost; - res = list.back(); - } - else - { + res = list.back(); + } else { delete list.back(); } list.pop_back(); } - if(!res) - { + if (!res) { cout << "ERROR: no valid tag organizations found" << endl; exit(0); } @@ -685,36 +686,31 @@ void filter_tag_arr(const min_values_t * min, list & list) list.push_back(res); } - - -void filter_data_arr(list & curr_list) -{ - if (curr_list.empty() == true) - { +void filter_data_arr(list &curr_list) { + if (curr_list.empty() == true) { cout << "ERROR: no valid data array organizations found" << endl; exit(1); } list::iterator iter; - for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) - { - mem_array * m = *iter; + for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) { + mem_array *m = *iter; - if (m == NULL) exit(1); + if (m == NULL) + exit(1); - if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && - ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) - { + if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay > + 0.5) && + ((m->power.readOp.dynamic - m->arr_min->min_dyn) / m->arr_min->min_dyn > + 0.5)) { delete m; iter = curr_list.erase(iter); - iter --; + iter--; } } } - - /* * Performs exhaustive search across different sub-array sizes, * wire types and aspect ratios to find an optimal UCA organization @@ -727,16 +723,14 @@ void filter_data_arr(list & curr_list) * above results * 4. Cache model with least cost is picked from sol_list */ -void solve(uca_org_t *fin_res) -{ - bool is_dram = false; - int pure_ram = g_ip->pure_ram; - bool pure_cam = g_ip->pure_cam; +void solve(uca_org_t *fin_res) { + bool is_dram = false; + int pure_ram = g_ip->pure_ram; + bool pure_cam = g_ip->pure_cam; init_tech_params(g_ip->F_sz_um, false); - - list tag_arr (0); + list tag_arr(0); list data_arr(0); list::iterator miter; list sol_list(1, uca_org_t()); @@ -749,46 +743,44 @@ void solve(uca_org_t *fin_res) fin_res->tag_array.Ndsam_lev_1 = 0; fin_res->tag_array.Ndsam_lev_2 = 0; - // distribute calculate_time() execution to multiple threads - calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; + calc_time_mt_wrapper_struct *calc_array = + new calc_time_mt_wrapper_struct[nthreads]; pthread_t threads[nthreads]; - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].tid = t; - calc_array[t].pure_ram = pure_ram; - calc_array[t].pure_cam = pure_cam; - calc_array[t].data_res = new min_values_t(); - calc_array[t].tag_res = new min_values_t(); + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].tid = t; + calc_array[t].pure_ram = pure_ram; + calc_array[t].pure_cam = pure_cam; + calc_array[t].data_res = new min_values_t(); + calc_array[t].tag_res = new min_values_t(); } - bool is_tag; + bool is_tag; uint32_t ram_cell_tech_type; - // If it's a cache, first calculate the area, delay and power for all tag array partitions. - if (!(pure_ram||pure_cam||g_ip->fully_assoc)) - { //cache - is_tag = true; - ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + // If it's a cache, first calculate the area, delay and power for all tag + // array partitions. + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { // cache + is_tag = true; + ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; + is_dram = + ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = false; - calc_array[t].Nspd_min = 0.125; - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); + calc_array[t].Nspd_min = 0.125; + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); } - for (uint32_t t = 0; t < nthreads; t++) - { + for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } - for (uint32_t t = 0; t < nthreads; t++) - { + for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].data_arr.sort(mem_array::lt); data_arr.merge(calc_array[t].data_arr, mem_array::lt); calc_array[t].tag_arr.sort(mem_array::lt); @@ -796,76 +788,68 @@ void solve(uca_org_t *fin_res) } } - - // calculate the area, delay and power for all data array partitions (for cache or plain RAM). -// if (!g_ip->fully_assoc) -// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion - is_tag = false; - ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - init_tech_params(g_ip->F_sz_um, is_tag); - - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; - calc_array[t].is_main_mem = g_ip->is_main_mem; - if (!(pure_cam||g_ip->fully_assoc)) - { - calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); - } - else - { - calc_array[t].Nspd_min = 1; - } - - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); + // calculate the area, delay and power for all data array partitions (for + // cache or plain RAM). + // if (!g_ip->fully_assoc) + // {//in the new cacti, cam, fully_associative cache are processed as single + // array in the data portion + is_tag = false; + ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; + is_dram = + ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + init_tech_params(g_ip->F_sz_um, is_tag); + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam || g_ip->fully_assoc)) { + calc_array[t].Nspd_min = + (double)(g_ip->out_w) / (double)(g_ip->block_sz * 8); + } else { + calc_array[t].Nspd_min = 1; } - for (uint32_t t = 0; t < nthreads; t++) - { - pthread_join(threads[t], NULL); - } + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); + } - data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].data_arr.sort(mem_array::lt); - data_arr.merge(calc_array[t].data_arr, mem_array::lt); - } -// } + for (uint32_t t = 0; t < nthreads; t++) { + pthread_join(threads[t], NULL); + } + data_arr.clear(); + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + } + // } - min_values_t * d_min = new min_values_t(); - min_values_t * t_min = new min_values_t(); - min_values_t * cache_min = new min_values_t(); + min_values_t *d_min = new min_values_t(); + min_values_t *t_min = new min_values_t(); + min_values_t *cache_min = new min_values_t(); - for (uint32_t t = 0; t < nthreads; t++) - { + for (uint32_t t = 0; t < nthreads; t++) { d_min->update_min_values(calc_array[t].data_res); t_min->update_min_values(calc_array[t].tag_res); } - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { (*miter)->arr_min = d_min; } - - //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; + // cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; filter_data_arr(data_arr); - if(!(pure_ram||pure_cam||g_ip->fully_assoc)) - { + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { filter_tag_arr(t_min, tag_arr); } - //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; - - - if (pure_ram||pure_cam||g_ip->fully_assoc) - { - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); //essentially adds value to sol_list, with no extra memory copying. - curr_org.tag_array2 = NULL; + // cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; + + if (pure_ram || pure_cam || g_ip->fully_assoc) { + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t &curr_org = + sol_list.back(); // essentially adds value to sol_list, with no extra + // memory copying. + curr_org.tag_array2 = NULL; curr_org.data_array2 = (*miter); curr_org.find_delay(); @@ -873,32 +857,32 @@ void solve(uca_org_t *fin_res) curr_org.find_area(); curr_org.find_cyc(); - //update min values for the entire cache + // update min values for the entire cache cache_min->update_min_values(curr_org); - sol_list.push_back(uca_org_t());//add a new node to the back + sol_list.push_back(uca_org_t()); // add a new node to the back } - } - else - { - while (tag_arr.empty() != true) - { - mem_array * arr_temp = (tag_arr.back()); - //delete tag_arr.back(); - tag_arr.pop_back();//this causes double free problem if uca_org_t has a destructor to release all contained pointers---when called by sol_list.clear(); so uca_org_t does not use destructor to delete contained pointers - - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); - curr_org.tag_array2 = arr_temp; - curr_org.data_array2 = (*miter); //try all combinations of tag and data array + } else { + while (tag_arr.empty() != true) { + mem_array *arr_temp = (tag_arr.back()); + // delete tag_arr.back(); + tag_arr.pop_back(); // this causes double free problem if uca_org_t has a + // destructor to release all contained pointers---when + // called by sol_list.clear(); so uca_org_t does not + // use destructor to delete contained pointers + + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t &curr_org = sol_list.back(); + curr_org.tag_array2 = arr_temp; + curr_org.data_array2 = + (*miter); // try all combinations of tag and data array curr_org.find_delay(); curr_org.find_energy(); curr_org.find_area(); curr_org.find_cyc(); - //update min values for the entire cache + // update min values for the entire cache cache_min->update_min_values(curr_org); sol_list.push_back(uca_org_t()); @@ -906,184 +890,200 @@ void solve(uca_org_t *fin_res) } } - sol_list.pop_back();//delete the last unused node added in the loop above + sol_list.pop_back(); // delete the last unused node added in the loop above find_optimal_uca(fin_res, cache_min, sol_list); sol_list.clear(); - for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) - { - if (*miter != fin_res->data_array2) - { + for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) { + if (*miter != fin_res->data_array2) { delete *miter; } } data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { + for (uint32_t t = 0; t < nthreads; t++) { delete calc_array[t].data_res; delete calc_array[t].tag_res; } - delete [] calc_array; + delete[] calc_array; delete cache_min; delete d_min; delete t_min; } -void update_dvs(uca_org_t *fin_res) -{ - if(fin_res->tag_array2 || fin_res->data_array2) - { -// Wire::print_wire(); - Wire winit;//init before changing dvs -// fin_res->uca_q = vector(g_ip->dvs_voltage.size()); - for (unsigned int i=0; i< g_ip->dvs_voltage.size(); i++) - { - - fin_res->uca_q.push_back(new uca_org_t()); - - g_ip->hp_Vdd = g_ip->dvs_voltage[i]; - g_ip->specific_hp_vdd = true; - g_ip->lstp_Vdd = g_ip->dvs_voltage[i]; - g_ip->specific_lstp_vdd = true; - g_ip->lop_Vdd = g_ip->dvs_voltage[i]; - g_ip->specific_lop_vdd = true; -// g_ip->power_gating = false; -// g_ip->bitline_floating = false; -// g_ip->wl_power_gated = false; -// g_ip->interconect_power_gated = false; -// g_ip->cl_power_gated = false; -// g_ip->array_power_gated = false; - - init_tech_params(g_ip->F_sz_um,true); - winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); -// Wire::print_wire(); - - if(fin_res->tag_array2) - { - DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(tag_arr_dyn_p.is_valid) - { - - UCA * tag_arr = new UCA(tag_arr_dyn_p); - fin_res->uca_q[i]->tag_array2 = new mem_array(); - - collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_q[i]->tag_array2, g_ip->is_main_mem); - delete tag_arr; - } - - } - DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(data_arr_dyn_p.is_valid) - { - UCA * data_arr = new UCA(data_arr_dyn_p); - fin_res->uca_q[i]->data_array2 = new mem_array(); - collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_q[i]->data_array2, g_ip->is_main_mem); - delete data_arr; - } - - fin_res->uca_q[i]->find_delay(); - fin_res->uca_q[i]->find_energy(); - fin_res->uca_q[i]->find_area(); - fin_res->uca_q[i]->find_cyc(); - -// output_UCA(fin_res->uca_q[i]); -// Wire::print_wire(); - } - //reset input to original values in *.cfg file - g_ip->specific_hp_vdd = false; - g_ip->specific_lstp_vdd = false; - g_ip->specific_lop_vdd = false; - init_tech_params(g_ip->F_sz_um,true); - } - else - { - cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl; - exit(1); - } -} +void update_dvs(uca_org_t *fin_res) { + if (fin_res->tag_array2 || fin_res->data_array2) { + // Wire::print_wire(); + Wire winit; // init before changing dvs + // fin_res->uca_q = vector(g_ip->dvs_voltage.size()); + for (unsigned int i = 0; i < g_ip->dvs_voltage.size(); i++) { + + fin_res->uca_q.push_back(new uca_org_t()); + + g_ip->hp_Vdd = g_ip->dvs_voltage[i]; + g_ip->specific_hp_vdd = true; + g_ip->lstp_Vdd = g_ip->dvs_voltage[i]; + g_ip->specific_lstp_vdd = true; + g_ip->lop_Vdd = g_ip->dvs_voltage[i]; + g_ip->specific_lop_vdd = true; + // g_ip->power_gating = false; + // g_ip->bitline_floating = false; + // g_ip->wl_power_gated = false; + // g_ip->interconect_power_gated = false; + // g_ip->cl_power_gated = false; + // g_ip->array_power_gated = false; + + init_tech_params(g_ip->F_sz_um, true); + winit.wire_dvs_update(); // Wire::wire_dvs_update();//Wire winit (1,1, + // false); + // Wire::print_wire(); + + if (fin_res->tag_array2) { + DynamicParameter tag_arr_dyn_p( + true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, + g_ip->is_main_mem); + if (tag_arr_dyn_p.is_valid) { + + UCA *tag_arr = new UCA(tag_arr_dyn_p); + fin_res->uca_q[i]->tag_array2 = new mem_array(); + + collect_uca_results( + fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, tag_arr, + fin_res->uca_q[i]->tag_array2, g_ip->is_main_mem); + delete tag_arr; + } + } + DynamicParameter data_arr_dyn_p( + false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, + g_ip->is_main_mem); + if (data_arr_dyn_p.is_valid) { + UCA *data_arr = new UCA(data_arr_dyn_p); + fin_res->uca_q[i]->data_array2 = new mem_array(); + collect_uca_results( + fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, data_arr, + fin_res->uca_q[i]->data_array2, g_ip->is_main_mem); + delete data_arr; + } -void update_pg(uca_org_t *fin_res) -{ + fin_res->uca_q[i]->find_delay(); + fin_res->uca_q[i]->find_energy(); + fin_res->uca_q[i]->find_area(); + fin_res->uca_q[i]->find_cyc(); + + // output_UCA(fin_res->uca_q[i]); + // Wire::print_wire(); + } + // reset input to original values in *.cfg file + g_ip->specific_hp_vdd = false; + g_ip->specific_lstp_vdd = false; + g_ip->specific_lop_vdd = false; + init_tech_params(g_ip->F_sz_um, true); + } else { + cout << "ERROR: Cannot retrieve array structure for tag and data array" + << endl; + exit(1); + } +} - if(fin_res->tag_array2 || fin_res->data_array2) - { - Wire winit; - fin_res->uca_pg_reference = new uca_org_t(); - /* - if (i == 0) {g_ip->hp_Vdd = 0.8; } - else g_ip->hp_Vdd = 1.1; - g_ip->specific_hp_vdd = true; - cout<<"VDD=====" << g_ip->hp_Vdd <F_sz_um,true); - winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); - Wire::print_wire(); - */ - g_ip->array_power_gated = false; - g_ip->bitline_floating = false; - g_ip->wl_power_gated = false; - g_ip->cl_power_gated = false; - g_ip->interconect_power_gated = false; - g_ip->power_gating = false; -// winit.wire_dvs_update(); -// Wire::print_wire(); -// init_tech_params(g_ip->F_sz_um,true); -// winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false); -// Wire::print_wire(); - if(fin_res->tag_array2) - { - // init_tech_params(g_ip->F_sz_um,true); - DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(tag_arr_dyn_p.is_valid) - { - - UCA * tag_arr = new UCA(tag_arr_dyn_p); - fin_res->uca_pg_reference->tag_array2 = new mem_array(); - - collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_pg_reference->tag_array2, g_ip->is_main_mem); - delete tag_arr; - - } - - } - // init_tech_params(g_ip->F_sz_um,false); - DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(data_arr_dyn_p.is_valid) - { - UCA * data_arr = new UCA(data_arr_dyn_p); - fin_res->uca_pg_reference->data_array2 = new mem_array(); - collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_pg_reference->data_array2, g_ip->is_main_mem); - delete data_arr; - } - - fin_res->uca_pg_reference->find_delay(); - fin_res->uca_pg_reference->find_energy(); - fin_res->uca_pg_reference->find_area(); - fin_res->uca_pg_reference->find_cyc(); - -// output_UCA(fin_res->uca_pg_reference); -// Wire::print_wire(); - } - else - { - cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl; - exit(1); - } - //reset input to original values in *.cfg file - g_ip->array_power_gated = true; - g_ip->bitline_floating = true; - g_ip->wl_power_gated = true; - g_ip->cl_power_gated = true; - g_ip->interconect_power_gated = true; - g_ip->power_gating = true; +void update_pg(uca_org_t *fin_res) { + + if (fin_res->tag_array2 || fin_res->data_array2) { + Wire winit; + fin_res->uca_pg_reference = new uca_org_t(); + /* + if (i == 0) {g_ip->hp_Vdd = 0.8; } + else g_ip->hp_Vdd = 1.1; + g_ip->specific_hp_vdd = true; + cout<<"VDD=====" << g_ip->hp_Vdd <F_sz_um,true); + winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, + false); Wire::print_wire(); + */ + g_ip->array_power_gated = false; + g_ip->bitline_floating = false; + g_ip->wl_power_gated = false; + g_ip->cl_power_gated = false; + g_ip->interconect_power_gated = false; + g_ip->power_gating = false; + // winit.wire_dvs_update(); + // Wire::print_wire(); + // init_tech_params(g_ip->F_sz_um,true); + // winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit + //(1,1, false); Wire::print_wire(); + if (fin_res->tag_array2) { + // init_tech_params(g_ip->F_sz_um,true); + DynamicParameter tag_arr_dyn_p( + true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); + if (tag_arr_dyn_p.is_valid) { + + UCA *tag_arr = new UCA(tag_arr_dyn_p); + fin_res->uca_pg_reference->tag_array2 = new mem_array(); + + collect_uca_results( + fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, + tag_arr, fin_res->uca_pg_reference->tag_array2, g_ip->is_main_mem); + delete tag_arr; + } + } + // init_tech_params(g_ip->F_sz_um,false); + DynamicParameter data_arr_dyn_p( + false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); + if (data_arr_dyn_p.is_valid) { + UCA *data_arr = new UCA(data_arr_dyn_p); + fin_res->uca_pg_reference->data_array2 = new mem_array(); + collect_uca_results( + fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, + data_arr, fin_res->uca_pg_reference->data_array2, g_ip->is_main_mem); + delete data_arr; + } + fin_res->uca_pg_reference->find_delay(); + fin_res->uca_pg_reference->find_energy(); + fin_res->uca_pg_reference->find_area(); + fin_res->uca_pg_reference->find_cyc(); + // output_UCA(fin_res->uca_pg_reference); + // Wire::print_wire(); + } else { + cout << "ERROR: Cannot retrieve array structure for tag and data array" + << endl; + exit(1); + } + // reset input to original values in *.cfg file + g_ip->array_power_gated = true; + g_ip->bitline_floating = true; + g_ip->wl_power_gated = true; + g_ip->cl_power_gated = true; + g_ip->interconect_power_gated = true; + g_ip->power_gating = true; } /* update for thermal @@ -1092,29 +1092,34 @@ void update(uca_org_t *fin_res) if(fin_res->tag_array2) { init_tech_params(g_ip->F_sz_um,true); - DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(tag_arr_dyn_p.is_valid) + DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, +fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, +fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, +fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); fin_res->tag_array2->power = tag_arr->power; } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; - exit(1); + cout << "ERROR: Cannot retrieve array structure for leakage feedback" << +endl; exit(1); } } init_tech_params(g_ip->F_sz_um,false); - DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); - if(data_arr_dyn_p.is_valid) + DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, +fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, +fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, +fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, +g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); fin_res->data_array2->power = data_arr->power; } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; - exit(1); + cout << "ERROR: Cannot retrieve array structure for leakage feedback" << +endl; exit(1); } fin_res->find_energy(); diff --git a/cacti/Ucache.h b/cacti/Ucache.h index ccd65ee..216bbe6 100644 --- a/cacti/Ucache.h +++ b/cacti/Ucache.h @@ -29,40 +29,37 @@ * ***************************************************************************/ - #ifndef __UCACHE_H__ #define __UCACHE_H__ -#include #include "area.h" -#include "router.h" #include "nuca.h" +#include "router.h" #include "uca.h" +#include -class min_values_t -{ - public: - double min_delay; - double min_dyn; - double min_leakage; - double min_area; - double min_cyc; - - min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { } - - void update_min_values(const min_values_t * val); - void update_min_values(const uca_org_t & res); - void update_min_values(const nuca_org_t * res); - void update_min_values(const mem_array * res); +class min_values_t { +public: + double min_delay; + double min_dyn; + double min_leakage; + double min_area; + double min_cyc; + + min_values_t() + : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), + min_area(BIGNUM), min_cyc(BIGNUM) {} + + void update_min_values(const min_values_t *val); + void update_min_values(const uca_org_t &res); + void update_min_values(const nuca_org_t *res); + void update_min_values(const mem_array *res); }; - - -struct solution -{ - int tag_array_index; - int data_array_index; +struct solution { + int tag_array_index; + int data_array_index; list::iterator tag_array_iter; list::iterator data_array_iter; double access_time; @@ -72,39 +69,23 @@ struct solution powerDef total_power; }; - - -bool calculate_time( - bool is_tag, - int pure_ram, - bool pure_cam, - double Nspd, - unsigned int Ndwl, - unsigned int Ndbl, - unsigned int Ndcm, - unsigned int Ndsam_lev_1, - unsigned int Ndsam_lev_2, - mem_array *ptr_array, - int flag_results_populate, - results_mem_array *ptr_results, - uca_org_t *ptr_fin_res, - bool is_main_mem); +bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, + unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, + unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, + mem_array *ptr_array, int flag_results_populate, + results_mem_array *ptr_results, uca_org_t *ptr_fin_res, + bool is_main_mem); void collect_uca_results( -// bool is_tag, -// int pure_ram, -// bool pure_cam, - double Nspd, - unsigned int Ndwl, - unsigned int Ndbl, - unsigned int Ndcm, - unsigned int Ndsam_lev_1, - unsigned int Ndsam_lev_2, - UCA const * const uca, - mem_array * const ptr_array, -// int flag_results_populate, -// results_mem_array *ptr_results, -// uca_org_t *ptr_fin_res, + // bool is_tag, + // int pure_ram, + // bool pure_cam, + double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, + unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, UCA const *const uca, + mem_array *const ptr_array, + // int flag_results_populate, + // results_mem_array *ptr_results, + // uca_org_t *ptr_fin_res, bool is_main_mem); void update_dvs(uca_org_t *fin_res); @@ -113,23 +94,21 @@ void update_pg(uca_org_t *fin_res); void solve(uca_org_t *fin_res); void init_tech_params(double tech, bool is_tag); - -struct calc_time_mt_wrapper_struct -{ +struct calc_time_mt_wrapper_struct { uint32_t tid; - bool is_tag; - bool pure_ram; - bool pure_cam; - bool is_main_mem; - double Nspd_min; + bool is_tag; + bool pure_ram; + bool pure_cam; + bool is_main_mem; + double Nspd_min; - min_values_t * data_res; - min_values_t * tag_res; + min_values_t *data_res; + min_values_t *tag_res; list data_arr; list tag_arr; }; -void *calc_time_mt_wrapper(void * void_obj); +void *calc_time_mt_wrapper(void *void_obj); #endif diff --git a/cacti/arbiter.cc b/cacti/arbiter.cc index 6664abf..1b0ff93 100644 --- a/cacti/arbiter.cc +++ b/cacti/arbiter.cc @@ -31,100 +31,100 @@ #include "arbiter.h" -Arbiter::Arbiter( - double n_req, - double flit_size_, - double output_len, - TechnologyParameter::DeviceType *dt - ):R(n_req), flit_size(flit_size_), - o_len (output_len), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; +Arbiter::Arbiter(double n_req, double flit_size_, double output_len, + TechnologyParameter::DeviceType *dt) + : R(n_req), flit_size(flit_size_), o_len(output_len), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; Vdd = dt->Vdd; double technology = g_ip->F_sz_um; - NTn1 = 13.5*technology/2; - PTn1 = 76*technology/2; - NTn2 = 13.5*technology/2; - PTn2 = 76*technology/2; - NTi = 12.5*technology/2; - PTi = 25*technology/2; - NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology/2; /* pmos tr. length*/ + NTn1 = 13.5 * technology / 2; + PTn1 = 76 * technology / 2; + NTn2 = 13.5 * technology / 2; + PTn2 = 76 * technology / 2; + NTi = 12.5 * technology / 2; + PTi = 25 * technology / 2; + NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology / 2; /* pmos tr. length*/ } -Arbiter::~Arbiter(){} +Arbiter::~Arbiter() {} -double -Arbiter::arb_req() { - double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) + - gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); +double Arbiter::arb_req() { + double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + + 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0) + gate_C(NTi, 0) + + gate_C(PTi, 0) + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); return temp; } -double -Arbiter::arb_pri() { - double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance - of flip-flop is ignored */ +double Arbiter::arb_pri() { + double temp = + 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)); /* switching capacitance + of flip-flop is ignored */ return temp; } - -double -Arbiter::arb_grant() { - double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); +double Arbiter::arb_grant() { + double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); return temp; } -double -Arbiter::arb_int() { - double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + - 2*gate_C(NTn2, 0) + gate_C(PTn2, 0)); +double Arbiter::arb_int() { + double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0)); return temp; } -void -Arbiter::compute_power() { - power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 + - arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd); - double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage - power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd; +void Arbiter::compute_power() { + power.readOp.dynamic = + (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() * Vdd * Vdd / 2 + + arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd * Vdd); + double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak = + cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi, min_w_pmos * PTi, 1, inv); + double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak_gate = + cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi, min_w_pmos * PTi, 1, inv); + power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * + Vdd; // FIXME include priority table leakage + power.readOp.gate_leakage = + nor1_leak_gate * Vdd + nor2_leak_gate * Vdd + not_leak_gate * Vdd; } -double //wire cap with triple spacing +double // wire cap with triple spacing Arbiter::Cw3(double length) { Wire wc(g_ip->wt, length, 1, 3, 3); - double temp = (wc.wire_cap(length,true)); + double temp = (wc.wire_cap(length, true)); return temp; } -double -Arbiter::crossbar_ctrline() { - double temp = (Cw3(o_len * 1e-6 /* m */) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + - gate_C(NTi, 0) + gate_C(PTi, 0)); +double Arbiter::crossbar_ctrline() { + double temp = + (Cw3(o_len * 1e-6 /* m */) + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + gate_C(NTi, 0) + + gate_C(PTi, 0)); return temp; } -double -Arbiter::transmission_buf_ctrcap() { - double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0); +double Arbiter::transmission_buf_ctrcap() { + double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0); return temp; } - -void Arbiter::print_arbiter() -{ - cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; +void Arbiter::print_arbiter() { + cout << "\nArbiter Stats (" << R << " input arbiter" + << ")\n\n"; cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic * 1e9 << " (nJ)" + << endl; + cout << "Leakage Power : " << power.readOp.leakage * 1e3 << " (mW)" + << endl; } - - diff --git a/cacti/arbiter.h b/cacti/arbiter.h index 6e2bcb6..c924a95 100644 --- a/cacti/arbiter.h +++ b/cacti/arbiter.h @@ -32,46 +32,40 @@ #ifndef __ARBITER__ #define __ARBITER__ -#include -#include #include "basic_circuit.h" #include "cacti_interface.h" #include "component.h" -#include "parameter.h" #include "mat.h" +#include "parameter.h" #include "wire.h" -class Arbiter : public Component -{ - public: - Arbiter( - double Req, - double flit_sz, - double output_len, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); - ~Arbiter(); - - void print_arbiter(); - double arb_req(); - double arb_pri(); - double arb_grant(); - double arb_int(); - void compute_power(); - double Cw3(double len); - double crossbar_ctrline(); - double transmission_buf_ctrcap(); - +#include +#include +class Arbiter : public Component { +public: + Arbiter(double Req, double flit_sz, double output_len, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); + ~Arbiter(); - private: - double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi; - double flit_size; - double NTtr, PTtr; - double o_len; - TechnologyParameter::DeviceType *deviceType; - double TriS1, TriS2; - double min_w_pmos, Vdd; + void print_arbiter(); + double arb_req(); + double arb_pri(); + double arb_grant(); + double arb_int(); + void compute_power(); + double Cw3(double len); + double crossbar_ctrline(); + double transmission_buf_ctrcap(); +private: + double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi; + double flit_size; + double NTtr, PTtr; + double o_len; + TechnologyParameter::DeviceType *deviceType; + double TriS1, TriS2; + double min_w_pmos, Vdd; }; #endif diff --git a/cacti/area.cc b/cacti/area.cc index 0d8d4b7..f182ecb 100644 --- a/cacti/area.cc +++ b/cacti/area.cc @@ -29,18 +29,15 @@ * ***************************************************************************/ - - #include "area.h" + +#include "basic_circuit.h" #include "component.h" #include "decoder.h" #include "parameter.h" -#include "basic_circuit.h" + +#include #include #include -#include using namespace std; - - - diff --git a/cacti/area.h b/cacti/area.h index 92272f0..7c1080d 100644 --- a/cacti/area.h +++ b/cacti/area.h @@ -29,43 +29,35 @@ * ***************************************************************************/ - - #ifndef __AREA_H__ #define __AREA_H__ -#include "cacti_interface.h" #include "basic_circuit.h" +#include "cacti_interface.h" using namespace std; -class Area -{ - public: +class Area { +public: double w; double h; - Area():w(0), h(0), area(0) { } + Area() : w(0), h(0), area(0) {} double get_w() const { return w; } double get_h() const { return h; } - double get_area() const - { - if (w == 0 && h == 0) - { + double get_area() const { + if (w == 0 && h == 0) { return area; - } - else - { - return w*h; + } else { + return w * h; } } void set_w(double w_) { w = w_; } void set_h(double h_) { h = h_; } void set_area(double a_) { area = a_; } - private: +private: double area; }; #endif - diff --git a/cacti/bank.cc b/cacti/bank.cc old mode 100755 new mode 100644 index 74b2c6d..7957649 --- a/cacti/bank.cc +++ b/cacti/bank.cc @@ -29,89 +29,94 @@ * ***************************************************************************/ - - #include "bank.h" -#include +#include -Bank::Bank(const DynamicParameter & dyn_p): - dp(dyn_p), mat(dp), - num_addr_b_mat(dyn_p.number_addr_bits_mat), - num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir), - array_leakage(0), - wl_leakage(0), - cl_leakage(0) -{ +Bank::Bank(const DynamicParameter &dyn_p) + : dp(dyn_p), mat(dp), num_addr_b_mat(dyn_p.number_addr_bits_mat), + num_mats_hor_dir(dyn_p.num_mats_h_dir), + num_mats_ver_dir(dyn_p.num_mats_v_dir), array_leakage(0), wl_leakage(0), + cl_leakage(0) { int RWP; int ERP; int EWP; int SCHP; - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; SCHP = g_ip->num_search_ports; } - int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + int total_addrbits = + (dp.number_addr_bits_mat + dp.number_subbanks_decode) * (RWP + ERP + EWP); + int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); int searchinbits; int searchoutbits; - if (dp.fully_assoc || dp.pure_cam) - { - datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); - searchinbits = dp.num_si_b_bank_per_port * SCHP; - searchoutbits = dp.num_so_b_bank_per_port * SCHP; + if (dp.fully_assoc || dp.pure_cam) { + datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + searchinbits = dp.num_si_b_bank_per_port * SCHP; + searchoutbits = dp.num_so_b_bank_per_port * SCHP; } - if (!(dp.fully_assoc || dp.pure_cam)) - { - if (g_ip->fast_access && dp.is_tag == false) - { - dataoutbits *= g_ip->data_assoc; + if (!(dp.fully_assoc || dp.pure_cam)) { + if (g_ip->fast_access && dp.is_tag == false) { + dataoutbits *= g_ip->data_assoc; } - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - -// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, -// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - } - else - { - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true); - htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true); - - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; + htree_in_add = + new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, 0, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Add_htree); + htree_in_data = + new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, 0, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_in_htree); + htree_out_data = + new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, 0, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree); + + // htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, + // total_addrbits, datainbits, 0,dataoutbits,0, + // num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } else { + htree_in_add = new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, + dataoutbits, searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Add_htree); + htree_in_data = new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, + dataoutbits, searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_in_htree); + htree_out_data = new Htree2( + g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, + datainbits, searchinbits, dataoutbits, searchoutbits, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree); + htree_in_search = new Htree2( + g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, + datainbits, searchinbits, dataoutbits, searchoutbits, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_in_htree, true, true); + htree_out_search = new Htree2( + g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, + datainbits, searchinbits, dataoutbits, searchoutbits, + num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree, true); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; } num_addr_b_row_dec = _log2(mat.subarray.num_rows); @@ -119,93 +124,82 @@ Bank::Bank(const DynamicParameter & dyn_p): num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec; } - - -Bank::~Bank() -{ +Bank::~Bank() { delete htree_in_add; delete htree_out_data; delete htree_in_data; - if (dp.fully_assoc || dp.pure_cam) - { - delete htree_in_search; - delete htree_out_search; + if (dp.fully_assoc || dp.pure_cam) { + delete htree_in_search; + delete htree_out_search; } } - - -double Bank::compute_delays(double inrisetime) -{ +double Bank::compute_delays(double inrisetime) { return mat.compute_delays(inrisetime); } - - -void Bank::compute_power_energy() -{ +void Bank::compute_power_energy() { mat.compute_power_energy(); - if (!(dp.fully_assoc || dp.pure_cam)) - { - power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - power.readOp.power_gated_leakage += mat.power.readOp.power_gated_leakage * dp.num_mats; - - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - - array_leakage += mat.array_leakage*dp.num_mats; - wl_leakage += mat.wl_leakage*dp.num_mats; - cl_leakage += mat.cl_leakage*dp.num_mats; - - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - - power.readOp.power_gated_leakage += htree_in_add->power.readOp.power_gated_leakage; - power.readOp.power_gated_leakage += htree_in_data->power.readOp.power_gated_leakage; - power.readOp.power_gated_leakage += htree_out_data->power.readOp.power_gated_leakage; - - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + if (!(dp.fully_assoc || dp.pure_cam)) { + power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + power.readOp.power_gated_leakage += + mat.power.readOp.power_gated_leakage * dp.num_mats; + + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + + array_leakage += mat.array_leakage * dp.num_mats; + wl_leakage += mat.wl_leakage * dp.num_mats; + cl_leakage += mat.cl_leakage * dp.num_mats; + + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + + power.readOp.power_gated_leakage += + htree_in_add->power.readOp.power_gated_leakage; + power.readOp.power_gated_leakage += + htree_in_data->power.readOp.power_gated_leakage; + power.readOp.power_gated_leakage += + htree_out_data->power.readOp.power_gated_leakage; + + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + } else { + + power.readOp.dynamic += + mat.power.readOp + .dynamic; // for fa and cam num_act_mats_hor_dir is 1 for plain r/w + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + + power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; + power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + + mat.power_sa.searchOp.dynamic + + mat.power_bitline.searchOp.dynamic + + mat.power_subarray_out_drv.searchOp.dynamic + + mat.ml_to_ram_wl_drv->power.readOp.dynamic; + + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + + power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; + + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.leakage += htree_in_search->power.readOp.leakage; + power.readOp.leakage += htree_out_search->power.readOp.leakage; + + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; } - else - { - - power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - - power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; - power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + - mat.power_sa.searchOp.dynamic + - mat.power_bitline.searchOp.dynamic + - mat.power_subarray_out_drv.searchOp.dynamic+ - mat.ml_to_ram_wl_drv->power.readOp.dynamic; - - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - - power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; - power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; - - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - power.readOp.leakage += htree_in_search->power.readOp.leakage; - power.readOp.leakage += htree_out_search->power.readOp.leakage; - - - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; - - } - } - diff --git a/cacti/bank.h b/cacti/bank.h old mode 100755 new mode 100644 index 79c847f..0f7cfae --- a/cacti/bank.h +++ b/cacti/bank.h @@ -29,46 +29,40 @@ * ***************************************************************************/ - - #ifndef __BANK_H__ #define __BANK_H__ #include "component.h" #include "decoder.h" -#include "mat.h" #include "htree2.h" +#include "mat.h" +class Bank : public Component { +public: + Bank(const DynamicParameter &dyn_p); + ~Bank(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_energy(); -class Bank : public Component -{ - public: - Bank(const DynamicParameter & dyn_p); - ~Bank(); - double compute_delays(double inrisetime); // return outrisetime - void compute_power_energy(); - - const DynamicParameter & dp; - Mat mat; - Htree2 *htree_in_add; - Htree2 *htree_in_data; - Htree2 *htree_out_data; - Htree2 *htree_in_search; - Htree2 *htree_out_search; + const DynamicParameter &dp; + Mat mat; + Htree2 *htree_in_add; + Htree2 *htree_in_data; + Htree2 *htree_out_data; + Htree2 *htree_in_search; + Htree2 *htree_out_search; - int num_addr_b_mat; - int num_mats_hor_dir; - int num_mats_ver_dir; + int num_addr_b_mat; + int num_mats_hor_dir; + int num_mats_ver_dir; - int num_addr_b_row_dec; - int num_addr_b_routed_to_mat_for_act; - int num_addr_b_routed_to_mat_for_rd_or_wr; + int num_addr_b_row_dec; + int num_addr_b_routed_to_mat_for_act; + int num_addr_b_routed_to_mat_for_rd_or_wr; - double array_leakage; - double wl_leakage; - double cl_leakage; + double array_leakage; + double wl_leakage; + double cl_leakage; }; - - #endif diff --git a/cacti/basic_circuit.cc b/cacti/basic_circuit.cc index b81f8bf..d1fb9e5 100644 --- a/cacti/basic_circuit.cc +++ b/cacti/basic_circuit.cc @@ -29,27 +29,23 @@ * ***************************************************************************/ - - - #include "basic_circuit.h" + #include "parameter.h" -#include + #include #include +#include -uint32_t _log2(uint64_t num) -{ +uint32_t _log2(uint64_t num) { uint32_t log2 = 0; - if (num == 0) - { + if (num == 0) { std::cerr << "log0?" << std::endl; exit(1); } - while (num > 1) - { + while (num > 1) { num = (num >> 1); log2++; } @@ -57,26 +53,17 @@ uint32_t _log2(uint64_t num) return log2; } - -bool is_pow2(int64_t val) -{ - if (val <= 0) - { +bool is_pow2(int64_t val) { + if (val <= 0) { return false; - } - else if (val == 1) - { + } else if (val == 1) { return true; - } - else - { - return (_log2(val) != _log2(val-1)); + } else { + return (_log2(val) != _log2(val - 1)); } } - -int powers (int base, int n) -{ +int powers(int base, int n) { int i, p; p = 1; @@ -87,211 +74,151 @@ int powers (int base, int n) /*----------------------------------------------------------------------*/ -double logtwo (double x) -{ +double logtwo(double x) { assert(x > 0); - return ((double) (log (x) / log (2.0))); + return ((double)(log(x) / log(2.0))); } /*----------------------------------------------------------------------*/ - -double gate_C( - double width, - double wirelength, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - const TechnologyParameter::DeviceType * dt; - - if (_is_dram && _is_cell) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if (_is_dram && _is_wl_tr) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if (!_is_dram && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { +double gate_C(double width, double wirelength, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + const TechnologyParameter::DeviceType *dt; + + if (_is_dram && _is_cell) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if (_is_dram && _is_wl_tr) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if (!_is_dram && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { dt = &g_tp.peri_global; } - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + return (dt->C_g_ideal + dt->C_overlap + 3 * dt->C_fringe) * width + + dt->l_phy * Cpolywire; } - // returns gate capacitance in Farads // actually this function is the same as gate_C() now -double gate_C_pass( - double width, // gate width in um (length is Lphy_periph_global) - double wirelength, // poly wire length going to gate in lambda - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ +double +gate_C_pass(double width, // gate width in um (length is Lphy_periph_global) + double wirelength, // poly wire length going to gate in lambda + bool _is_dram, bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) { // v5.0 - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { + const TechnologyParameter::DeviceType *dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { dt = &g_tp.peri_global; } - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + return (dt->C_g_ideal + dt->C_overlap + 3 * dt->C_fringe) * width + + dt->l_phy * Cpolywire; } - - -double drain_C_( - double width, - int nchannel, - int stack, - int next_arg_thresh_folding_width_or_height_cell, - double fold_dimension, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ +double drain_C_(double width, int nchannel, int stack, + int next_arg_thresh_folding_width_or_height_cell, + double fold_dimension, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { double w_folded_tr; - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; // DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; // DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { + const TechnologyParameter::DeviceType *dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { dt = &g_tp.peri_global; } double c_junc_area = dt->C_junc; double c_junc_sidewall = dt->C_junc_sidewall; - double c_fringe = 2*dt->C_fringe; - double c_overlap = 2*dt->C_overlap; + double c_fringe = 2 * dt->C_fringe; + double c_overlap = 2 * dt->C_overlap; double drain_C_metal_connecting_folded_tr = 0; - // determine the width of the transistor after folding (if it is getting folded) - if (next_arg_thresh_folding_width_or_height_cell == 0) - { // interpret fold_dimension as the the folding width threshold + // determine the width of the transistor after folding (if it is getting + // folded) + if (next_arg_thresh_folding_width_or_height_cell == + 0) { // interpret fold_dimension as the the folding width threshold // i.e. the value of transistor width above which the transistor gets folded w_folded_tr = fold_dimension; - } - else - { // interpret fold_dimension as the height of the cell that this transistor is part of. - double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; + } else { // interpret fold_dimension as the height of the cell that this + // transistor is part of. + double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; // TODO : w_folded_tr must come from Component::compute_gate_area() double ratio_p_to_n = 2.0 / (2.0 + 1.0); - if (nchannel) - { - w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); - } - else - { - w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + if (nchannel) { + w_folded_tr = + (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } else { + w_folded_tr = + ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); } } - int num_folded_tr = (int) (ceil(width / w_folded_tr)); + int num_folded_tr = (int)(ceil(width / w_folded_tr)); - if (num_folded_tr < 2) - { + if (num_folded_tr < 2) { w_folded_tr = width; } - double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain + double total_drain_w = (g_tp.w_poly_contact + + 2 * g_tp.spacing_poly_to_contact) + // only for drain (stack - 1) * g_tp.spacing_poly_to_poly; double drain_h_for_sidewall = w_folded_tr; - double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); - if (num_folded_tr > 1) - { - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + - (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); - - if (num_folded_tr%2 == 0) - { + double total_drain_height_for_cap_wrt_gate = + w_folded_tr + 2 * w_folded_tr * (stack - 1); + if (num_folded_tr > 1) { + total_drain_w += + (num_folded_tr - 2) * + (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); + + if (num_folded_tr % 2 == 0) { drain_h_for_sidewall = 0; } total_drain_height_for_cap_wrt_gate *= num_folded_tr; - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; + drain_C_metal_connecting_folded_tr = + g_tp.wire_local.C_per_um * total_drain_w; } - double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; - double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); - double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; + double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; + double drain_C_sidewall = + c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); + double drain_C_wrt_gate = + (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; - return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); + return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + + drain_C_metal_connecting_folded_tr); } - -double tr_R_on( - double width, - int nchannel, - int stack, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { +double tr_R_on(double width, int nchannel, int stack, bool _is_dram, + bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) { + const TechnologyParameter::DeviceType *dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { dt = &g_tp.peri_global; } @@ -299,40 +226,24 @@ double tr_R_on( return (stack * restrans / width); } - /* This routine operates in reverse: given a resistance, it finds * the transistor width that would have this R. It is used in the * data wordline to estimate the wordline driver size. */ // returns width in um -double R_to_w( - double res, - int nchannel, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && (_is_cell)) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { +double R_to_w(double res, int nchannel, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + const TechnologyParameter::DeviceType *dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && (_is_cell)) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { dt = &g_tp.peri_global; } @@ -340,603 +251,516 @@ double R_to_w( return (restrans / res); } - -double pmos_to_nmos_sz_ratio( - bool _is_dram, - bool _is_wl_tr, - bool _is_sleep_tx) -{ +double pmos_to_nmos_sz_ratio(bool _is_dram, bool _is_wl_tr, bool _is_sleep_tx) { double p_to_n_sizing_ratio; - if ((_is_dram) && (_is_wl_tr)) - { //DRAM wordline transistor + if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; - } - else if (_is_sleep_tx) - { - p_to_n_sizing_ratio = g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + p_to_n_sizing_ratio = + g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio; // Sleep transistor + } else { // DRAM or SRAM all other transistors p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; } return p_to_n_sizing_ratio; } - // "Timing Models for MOS Circuits" by Mark Horowitz, 1984 -double horowitz( - double inputramptime, // input rise time - double tf, // time constant of gate - double vs1, // threshold voltage1/Vdd - double vs2, // threshold voltage2/vdd - int rise) // whether input rises or fall +double horowitz(double inputramptime, // input rise time + double tf, // time constant of gate + double vs1, // threshold voltage1/Vdd + double vs2, // threshold voltage2/vdd + int rise) // whether input rises or fall { - if (inputramptime == 0 && vs1 == vs2) - { + if (inputramptime == 0 && vs1 == vs2) { return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); } double a, b, td; a = inputramptime / tf; - if (rise == RISE) - { + if (rise == RISE) { b = 0.5; - td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2)); - } - else - { + td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) + + tf * (log(vs1) - log(vs2)); + } else { b = 0.4; - td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2)); + td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) + + tf * (log(1.0 - vs1) - log(1.0 - vs2)); } return (td); } -double cmos_Ileak( - double nWidth, - double pWidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double cmos_Ileak(double nWidth, double pWidth, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } - return nWidth*dt->I_off_n + pWidth*dt->I_off_p; + return nWidth * dt->I_off_n + pWidth * dt->I_off_p; } -int factorial(int n, int m) -{ - int fa = m, i; - for (i=m+1; i<=n; i++) - fa *=i; - return fa; +int factorial(int n, int m) { + int fa = m, i; + for (i = m + 1; i <= n; i++) + fa *= i; + return fa; } -int combination(int n, int m) -{ +int combination(int n, int m) { int ret; - ret = factorial(n, m+1) / factorial(n - m); + ret = factorial(n, m + 1) / factorial(n - m); return ret; } -double simplified_nmos_Isat( - double nwidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double simplified_nmos_Isat(double nwidth, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } return nwidth * dt->I_on_n; } -double simplified_pmos_Isat( - double pwidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double simplified_pmos_Isat(double pwidth, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } - return pwidth * dt->I_on_n/dt->n_to_p_eff_curr_drv_ratio; + return pwidth * dt->I_on_n / dt->n_to_p_eff_curr_drv_ratio; } +double simplified_nmos_leakage(double nwidth, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; -double simplified_nmos_leakage( - double nwidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } return nwidth * dt->I_off_n; } -double simplified_pmos_leakage( - double pwidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double simplified_pmos_leakage(double pwidth, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } return pwidth * dt->I_off_p; } -double cmos_Ig_n( - double nWidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double cmos_Ig_n(double nWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, + bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } - return nWidth*dt->I_g_on_n; + return nWidth * dt->I_g_on_n; } -double cmos_Ig_p( - double pWidth, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx) -{ - TechnologyParameter::DeviceType * dt; +double cmos_Ig_p(double pWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, + bool _is_sleep_tx) { + TechnologyParameter::DeviceType *dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor + if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor + } else if ((_is_dram) && (_is_wl_tr)) { // DRAM wordline transistor dt = &(g_tp.dram_wl); - } - else if (_is_sleep_tx) - { - dt = &g_tp.sleep_tx; // Sleep transistor - } - else - { //DRAM or SRAM all other transistors + } else if (_is_sleep_tx) { + dt = &g_tp.sleep_tx; // Sleep transistor + } else { // DRAM or SRAM all other transistors dt = &(g_tp.peri_global); } - return pWidth*dt->I_g_on_p; + return pWidth * dt->I_g_on_p; } -double cmos_Isub_leakage( - double nWidth, - double pWidth, - int fanin, - enum Gate_type g_type, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); - double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); - double Isub=0; - int num_states; - int num_off_tx; - - num_states = int(pow(2.0, fanin)); - - switch (g_type) - { +double cmos_Isub_leakage(double nWidth, double pWidth, int fanin, + enum Gate_type g_type, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx, + enum Half_net_topology topo) { + assert(fanin >= 1); + double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, + _is_wl_tr, _is_sleep_tx); + double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, + _is_wl_tr, _is_sleep_tx); + double Isub = 0; + int num_states; + int num_off_tx; + + num_states = int(pow(2.0, fanin)); + + switch (g_type) { case nmos: - if (fanin==1) - { - Isub = nmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; - } - - } - break; + if (fanin == 1) { + Isub = nmos_leak / num_states; + } else { + if (topo == parallel) { + Isub = nmos_leak * fanin / + num_states; // only when all tx are off, leakage power is + // non-zero. The possibility of this state is + // 1/num_states + } else { + for (num_off_tx = 1; num_off_tx <= fanin; + num_off_tx++) // when num_off_tx ==0 there is no leakage power + { + // Isub += + // nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, + // num_off_tx)*factorial(num_off_tx))); + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * + combination(fanin, num_off_tx); + } + Isub /= num_states; + } + } + break; case pmos: - if (fanin==1) - { - Isub = pmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; - } - - } - break; + if (fanin == 1) { + Isub = pmos_leak / num_states; + } else { + if (topo == parallel) { + Isub = pmos_leak * fanin / + num_states; // only when all tx are off, leakage power is + // non-zero. The possibility of this state is + // 1/num_states + } else { + for (num_off_tx = 1; num_off_tx <= fanin; + num_off_tx++) // when num_off_tx ==0 there is no leakage power + { + // Isub += + // pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, + // num_off_tx)*factorial(num_off_tx))); + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * + combination(fanin, num_off_tx); + } + Isub /= num_states; + } + } + break; case inv: - Isub = (nmos_leak + pmos_leak)/2; - break; + Isub = (nmos_leak + pmos_leak) / 2; + break; case nand: - Isub += fanin*pmos_leak;//the pullup network - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; - break; + Isub += fanin * pmos_leak; // the pullup network + for (num_off_tx = 1; num_off_tx <= fanin; + num_off_tx++) // the pulldown network + { + // Isub += + // nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, + // num_off_tx)*factorial(num_off_tx))); + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * + combination(fanin, num_off_tx); + } + Isub /= num_states; + break; case nor: - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub += fanin*nmos_leak;//the pulldown network - Isub /=num_states; - break; + for (num_off_tx = 1; num_off_tx <= fanin; + num_off_tx++) // the pullup network + { + // Isub += + // pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, + // num_off_tx)*factorial(num_off_tx))); + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * + combination(fanin, num_off_tx); + } + Isub += fanin * nmos_leak; // the pulldown network + Isub /= num_states; + break; case tri: - Isub += (nmos_leak + pmos_leak)/2;//enabled - Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power - Isub /=2; - break; + Isub += (nmos_leak + pmos_leak) / 2; // enabled + Isub += nmos_leak * + UNI_LEAK_STACK_FACTOR; // disabled upper bound of leakage power + Isub /= 2; + break; case tg: - Isub = (nmos_leak + pmos_leak)/2; - break; + Isub = (nmos_leak + pmos_leak) / 2; + break; default: - assert(0); - break; - } + assert(0); + break; + } - return Isub; + return Isub; } +double cmos_Ig_leakage(double nWidth, double pWidth, int fanin, + enum Gate_type g_type, bool _is_dram, bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx, + enum Half_net_topology topo) { + assert(fanin >= 1); + double nmos_leak = + cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double pmos_leak = + cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double Ig_on = 0; + int num_states; + int num_on_tx; + + num_states = int(pow(2.0, fanin)); + + switch (g_type) { + case nmos: + if (fanin == 1) { + Ig_on = nmos_leak / num_states; + } else { + if (topo == parallel) { + for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) { + Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx; + } + } else { + Ig_on += nmos_leak * fanin; // pull down network when all TXs are on. + // num_on_tx is the number of on tx + for (num_on_tx = 1; num_on_tx < fanin; + num_on_tx++) // when num_on_tx=[1,n-1] + { + Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / + 2; // TODO: this is a approximation now, a precise + // computation will be very complicated. + } + Ig_on /= num_states; + } + } + break; + case pmos: + if (fanin == 1) { + Ig_on = pmos_leak / num_states; + } else { + if (topo == parallel) { + for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) { + Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx; + } + } else { + Ig_on += pmos_leak * fanin; // pull down network when all TXs are on. + // num_on_tx is the number of on tx + for (num_on_tx = 1; num_on_tx < fanin; + num_on_tx++) // when num_on_tx=[1,n-1] + { + Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / + 2; // TODO: this is a approximation now, a precise + // computation will be very complicated. + } + Ig_on /= num_states; + } + } + break; -double cmos_Ig_leakage( - double nWidth, - double pWidth, - int fanin, - enum Gate_type g_type, - bool _is_dram, - bool _is_cell, - bool _is_wl_tr, - bool _is_sleep_tx, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); - double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); - double Ig_on=0; - int num_states; - int num_on_tx; - - num_states = int(pow(2.0, fanin)); - - switch (g_type) - { - case nmos: - if (fanin==1) - { - Ig_on = nmos_leak/num_states; - } - else - { - if (topo==parallel) - { - for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++) - { - Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx; - } - } - else - { - Ig_on += nmos_leak * fanin;//pull down network when all TXs are on. - //num_on_tx is the number of on tx - for (num_on_tx=1; num_on_tx -#include - +#include "cacti_interface.h" +#include "Ucache.h" #include "area.h" #include "basic_circuit.h" #include "component.h" #include "const.h" #include "parameter.h" -#include "cacti_interface.h" -#include "Ucache.h" -#include -#include #include +#include +#include +#include +#include using namespace std; - -bool mem_array::lt(const mem_array * m1, const mem_array * m2) -{ - if (m1->Nspd < m2->Nspd) return true; - else if (m1->Nspd > m2->Nspd) return false; - else if (m1->Ndwl < m2->Ndwl) return true; - else if (m1->Ndwl > m2->Ndwl) return false; - else if (m1->Ndbl < m2->Ndbl) return true; - else if (m1->Ndbl > m2->Ndbl) return false; - else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; - else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; - else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; - else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; - else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; - else return false; +bool mem_array::lt(const mem_array *m1, const mem_array *m2) { + if (m1->Nspd < m2->Nspd) + return true; + else if (m1->Nspd > m2->Nspd) + return false; + else if (m1->Ndwl < m2->Ndwl) + return true; + else if (m1->Ndwl > m2->Ndwl) + return false; + else if (m1->Ndbl < m2->Ndbl) + return true; + else if (m1->Ndbl > m2->Ndbl) + return false; + else if (m1->deg_bl_muxing < m2->deg_bl_muxing) + return true; + else if (m1->deg_bl_muxing > m2->deg_bl_muxing) + return false; + else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) + return true; + else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) + return false; + else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) + return true; + else + return false; } - - -void uca_org_t::find_delay() -{ - mem_array * data_arr = data_array2; - mem_array * tag_arr = tag_array2; +void uca_org_t::find_delay() { + mem_array *data_arr = data_array2; + mem_array *tag_arr = tag_array2; // check whether it is a regular cache or scratch ram - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { access_time = data_arr->access_time; } // Both tag and data lookup happen in parallel // and the entire set is sent over the data array h-tree without // waiting for the way-select signal --TODO add the corresponding // power overhead Nav - else if (g_ip->fast_access == true) - { + else if (g_ip->fast_access == true) { access_time = MAX(tag_arr->access_time, data_arr->access_time); } // Tag is accessed first. On a hit, way-select signal along with the // address is sent to read/write the appropriate block in the data // array - else if (g_ip->is_seq_acc == true) - { + else if (g_ip->is_seq_acc == true) { access_time = tag_arr->access_time + data_arr->access_time; } // Normal access: tag array access and data array access happen in parallel. // But, the data array will wait for the way-select and transfer only the // appropriate block over the h-tree. - else - { - access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, - data_arr->delay_before_subarray_output_driver) + - data_arr->delay_from_subarray_output_driver_to_output; + else { + access_time = + MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, + data_arr->delay_before_subarray_output_driver) + + data_arr->delay_from_subarray_output_driver_to_output; } } - - -void uca_org_t::find_energy() -{ - if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache) +void uca_org_t::find_energy() { + if (!(g_ip->pure_ram || g_ip->pure_cam || + g_ip->fully_assoc)) //(g_ip->is_cache) power = data_array2->power + tag_array2->power; else power = data_array2->power; } - - -void uca_org_t::find_area() -{ - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false) +void uca_org_t::find_area() { + if (g_ip->pure_ram || g_ip->pure_cam || + g_ip->fully_assoc) //(g_ip->is_cache == false) { - cache_ht = data_array2->height; + cache_ht = data_array2->height; cache_len = data_array2->width; - } - else - { - cache_ht = MAX(tag_array2->height, data_array2->height); + } else { + cache_ht = MAX(tag_array2->height, data_array2->height); cache_len = tag_array2->width + data_array2->width; } area = cache_ht * cache_len; } -void uca_org_t::adjust_area() -{ +void uca_org_t::adjust_area() { double area_adjust; - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { - if (data_array2->area_efficiency/100.0<0.2) - { - //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); - area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0)); - cache_ht = cache_ht/area_adjust; - cache_len = cache_len/area_adjust; + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + if (data_array2->area_efficiency / 100.0 < 0.2) { + // area_adjust = + // sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); + area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0)); + cache_ht = cache_ht / area_adjust; + cache_len = cache_len / area_adjust; } } area = cache_ht * cache_len; } -void uca_org_t::find_cyc() -{ - if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false) +void uca_org_t::find_cyc() { + if ((g_ip->pure_ram || g_ip->pure_cam || + g_ip->fully_assoc)) //(g_ip->is_cache == false) { cycle_time = data_array2->cycle_time; - } - else - { - cycle_time = MAX(tag_array2->cycle_time, - data_array2->cycle_time); + } else { + cycle_time = MAX(tag_array2->cycle_time, data_array2->cycle_time); } } -uca_org_t :: uca_org_t() -:tag_array2(0), - data_array2(0), - uca_pg_reference(0) -{ - uca_q = vector(0); +uca_org_t ::uca_org_t() : tag_array2(0), data_array2(0), uca_pg_reference(0) { + uca_q = vector(0); } -void uca_org_t :: cleanup() -{ - // uca_org_t * it_uca_org; - if (data_array2!=0){ - delete data_array2; - data_array2 =0; - } - - if (tag_array2!=0){ - delete tag_array2; - tag_array2 =0; - } - - std::vector::size_type sz = uca_q.size(); - for (int i=sz-1; i>=0; i--) - { - if (uca_q[i]->data_array2!=0) - { - delete uca_q[i]->data_array2; - uca_q[i]->data_array2 =0; - } - if (uca_q[i]->tag_array2!=0){ - delete uca_q[i]->tag_array2; - uca_q[i]->tag_array2 =0; - } - delete uca_q[i]; - uca_q[i] =0; - uca_q.pop_back(); - } - - if (uca_pg_reference!=0) - { - if (uca_pg_reference->data_array2!=0) - { - delete uca_pg_reference->data_array2; - uca_pg_reference->data_array2 =0; - } - if (uca_pg_reference->tag_array2!=0){ - delete uca_pg_reference->tag_array2; - uca_pg_reference->tag_array2 =0; - } - delete uca_pg_reference; - uca_pg_reference =0; - } +void uca_org_t ::cleanup() { + // uca_org_t * it_uca_org; + if (data_array2 != 0) { + delete data_array2; + data_array2 = 0; + } + + if (tag_array2 != 0) { + delete tag_array2; + tag_array2 = 0; + } + + std::vector::size_type sz = uca_q.size(); + for (int i = sz - 1; i >= 0; i--) { + if (uca_q[i]->data_array2 != 0) { + delete uca_q[i]->data_array2; + uca_q[i]->data_array2 = 0; + } + if (uca_q[i]->tag_array2 != 0) { + delete uca_q[i]->tag_array2; + uca_q[i]->tag_array2 = 0; + } + delete uca_q[i]; + uca_q[i] = 0; + uca_q.pop_back(); + } + + if (uca_pg_reference != 0) { + if (uca_pg_reference->data_array2 != 0) { + delete uca_pg_reference->data_array2; + uca_pg_reference->data_array2 = 0; + } + if (uca_pg_reference->tag_array2 != 0) { + delete uca_pg_reference->tag_array2; + uca_pg_reference->tag_array2 = 0; + } + delete uca_pg_reference; + uca_pg_reference = 0; + } } -uca_org_t :: ~uca_org_t() -{ -// cleanup(); +uca_org_t ::~uca_org_t() { + // cleanup(); } diff --git a/cacti/cacti_interface.h b/cacti/cacti_interface.h index b2b8c4c..0a94e7f 100644 --- a/cacti/cacti_interface.h +++ b/cacti/cacti_interface.h @@ -29,189 +29,194 @@ * ***************************************************************************/ - - #ifndef __CACTI_INTERFACE_H__ #define __CACTI_INTERFACE_H__ +#include "const.h" + +#include +#include #include #include #include -#include -#include -#include "const.h" using namespace std; - class min_values_t; class mem_array; class uca_org_t; - -class powerComponents -{ - public: - double dynamic; - double leakage; - double gate_leakage; - double short_circuit; - double longer_channel_leakage; - double power_gated_leakage; - double power_gated_with_long_channel_leakage; - - powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), - longer_channel_leakage(0), power_gated_leakage(0), - power_gated_with_long_channel_leakage (0) { } - powerComponents(const powerComponents & obj) { *this = obj; } - powerComponents & operator=(const powerComponents & rhs) - { - dynamic = rhs.dynamic; - leakage = rhs.leakage; - gate_leakage = rhs.gate_leakage; - short_circuit = rhs.short_circuit; - longer_channel_leakage = rhs.longer_channel_leakage; - power_gated_leakage = rhs.power_gated_leakage; - power_gated_with_long_channel_leakage = rhs.power_gated_with_long_channel_leakage; - return *this; - } - void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0; - longer_channel_leakage = 0; power_gated_leakage = 0;power_gated_with_long_channel_leakage=0;} - - friend powerComponents operator+(const powerComponents & x, const powerComponents & y); - friend powerComponents operator*(const powerComponents & x, double const * const y); +class powerComponents { +public: + double dynamic; + double leakage; + double gate_leakage; + double short_circuit; + double longer_channel_leakage; + double power_gated_leakage; + double power_gated_with_long_channel_leakage; + + powerComponents() + : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), + longer_channel_leakage(0), power_gated_leakage(0), + power_gated_with_long_channel_leakage(0) {} + powerComponents(const powerComponents &obj) { *this = obj; } + powerComponents &operator=(const powerComponents &rhs) { + dynamic = rhs.dynamic; + leakage = rhs.leakage; + gate_leakage = rhs.gate_leakage; + short_circuit = rhs.short_circuit; + longer_channel_leakage = rhs.longer_channel_leakage; + power_gated_leakage = rhs.power_gated_leakage; + power_gated_with_long_channel_leakage = + rhs.power_gated_with_long_channel_leakage; + return *this; + } + void reset() { + dynamic = 0; + leakage = 0; + gate_leakage = 0; + short_circuit = 0; + longer_channel_leakage = 0; + power_gated_leakage = 0; + power_gated_with_long_channel_leakage = 0; + } + + friend powerComponents operator+(const powerComponents &x, + const powerComponents &y); + friend powerComponents operator*(const powerComponents &x, + double const *const y); }; - - -class powerDef -{ - public: - powerComponents readOp; - powerComponents writeOp; - powerComponents searchOp;//Sheng: for CAM and FA - - powerDef() : readOp(), writeOp(), searchOp() { } - void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();} - - friend powerDef operator+(const powerDef & x, const powerDef & y); - friend powerDef operator*(const powerDef & x, double const * const y); +class powerDef { +public: + powerComponents readOp; + powerComponents writeOp; + powerComponents searchOp; // Sheng: for CAM and FA + + powerDef() : readOp(), writeOp(), searchOp() {} + void reset() { + readOp.reset(); + writeOp.reset(); + searchOp.reset(); + } + + friend powerDef operator+(const powerDef &x, const powerDef &y); + friend powerDef operator*(const powerDef &x, double const *const y); }; -enum Wire_type -{ - Global /* gloabl wires with repeaters */, - Global_5 /* 5% delay penalty */, - Global_10 /* 10% delay penalty */, - Global_20 /* 20% delay penalty */, - Global_30 /* 30% delay penalty */, - Low_swing /* differential low power wires with high area overhead */, - Semi_global /* mid-level wires with repeaters*/, - Transmission /* tranmission lines with high area overhead */, - Optical /* optical wires */, - Invalid_wtype +enum Wire_type { + Global /* gloabl wires with repeaters */, + Global_5 /* 5% delay penalty */, + Global_10 /* 10% delay penalty */, + Global_20 /* 20% delay penalty */, + Global_30 /* 30% delay penalty */, + Low_swing /* differential low power wires with high area overhead */, + Semi_global /* mid-level wires with repeaters*/, + Transmission /* tranmission lines with high area overhead */, + Optical /* optical wires */, + Invalid_wtype }; - - -class InputParameter -{ - public: - - InputParameter(); - void parse_cfg(const string & infile); - - bool error_checking(); // return false if the input parameters are problematic - void display_ip(); - - unsigned int cache_sz; // in bytes - unsigned int line_sz; - unsigned int assoc; - unsigned int nbanks; - unsigned int out_w;// == nr_bits_out - bool specific_tag; - unsigned int tag_w; - unsigned int access_mode; - unsigned int obj_func_dyn_energy; - unsigned int obj_func_dyn_power; - unsigned int obj_func_leak_power; - unsigned int obj_func_cycle_t; - - double F_sz_nm; // feature size in nm - double F_sz_um; // feature size in um - bool specific_hp_vdd; // whether to have user defined vdd that is different from ITRS - double hp_Vdd; // user specified vdd - bool specific_lstp_vdd; // whether to have user defined vdd that is different from ITRS - double lstp_Vdd; - bool specific_lop_vdd; // whether to have user defined vdd that is different from ITRS - double lop_Vdd; - bool specific_vcc_min; // whether to have user defined vcc_min for power-gating that is different from the value constrained by technology for maintaining states - double user_defined_vcc_min; - bool user_defined_vcc_underflow; //flag to indicate when user defined vcc is too low for the circuit to retain state - unsigned int num_rw_ports; - unsigned int num_rd_ports; - unsigned int num_wr_ports; - unsigned int num_se_rd_ports; // number of single ended read ports - unsigned int num_search_ports; // number of search ports for CAM - bool is_main_mem; - bool is_cache; - bool pure_ram; - bool pure_cam; - bool rpters_in_htree; // if there are repeaters in htree segment - unsigned int ver_htree_wires_over_array; - unsigned int broadcast_addr_din_over_ver_htrees; - unsigned int temp; - - unsigned int ram_cell_tech_type; - unsigned int peri_global_tech_type; - unsigned int data_arr_ram_cell_tech_type; - unsigned int data_arr_peri_global_tech_type; - unsigned int tag_arr_ram_cell_tech_type; - unsigned int tag_arr_peri_global_tech_type; - - unsigned int burst_len; - unsigned int int_prefetch_w; - unsigned int page_sz_bits; - - unsigned int ic_proj_type; // interconnect_projection_type - unsigned int wire_is_mat_type; // wire_inside_mat_type - unsigned int wire_os_mat_type; // wire_outside_mat_type - enum Wire_type wt; - int force_wiretype; - bool print_input_args; - unsigned int nuca_cache_sz; // TODO - int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm; - bool force_cache_config; - - int cache_level; - int cores; - int nuca_bank_count; - int force_nuca_bank; - - int delay_wt, dynamic_power_wt, leakage_power_wt, - cycle_time_wt, area_wt; - int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, - cycle_time_wt_nuca, area_wt_nuca; - - int delay_dev, dynamic_power_dev, leakage_power_dev, - cycle_time_dev, area_dev; - int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, - cycle_time_dev_nuca, area_dev_nuca; - int ed; //ED or ED2 optimization - int nuca; - - bool fast_access; - unsigned int block_sz; // bytes - unsigned int tag_assoc; - unsigned int data_assoc; - bool is_seq_acc; - bool fully_assoc; - unsigned int nsets; // == number_of_sets - int print_detail; - - - bool add_ecc_b_; - //parameters for design constraint +class InputParameter { +public: + InputParameter(); + void parse_cfg(const string &infile); + + bool error_checking(); // return false if the input parameters are problematic + void display_ip(); + + unsigned int cache_sz; // in bytes + unsigned int line_sz; + unsigned int assoc; + unsigned int nbanks; + unsigned int out_w; // == nr_bits_out + bool specific_tag; + unsigned int tag_w; + unsigned int access_mode; + unsigned int obj_func_dyn_energy; + unsigned int obj_func_dyn_power; + unsigned int obj_func_leak_power; + unsigned int obj_func_cycle_t; + + double F_sz_nm; // feature size in nm + double F_sz_um; // feature size in um + bool specific_hp_vdd; // whether to have user defined vdd that is different + // from ITRS + double hp_Vdd; // user specified vdd + bool specific_lstp_vdd; // whether to have user defined vdd that is different + // from ITRS + double lstp_Vdd; + bool specific_lop_vdd; // whether to have user defined vdd that is different + // from ITRS + double lop_Vdd; + bool specific_vcc_min; // whether to have user defined vcc_min for + // power-gating that is different from the value + // constrained by technology for maintaining states + double user_defined_vcc_min; + bool user_defined_vcc_underflow; // flag to indicate when user defined vcc is + // too low for the circuit to retain state + unsigned int num_rw_ports; + unsigned int num_rd_ports; + unsigned int num_wr_ports; + unsigned int num_se_rd_ports; // number of single ended read ports + unsigned int num_search_ports; // number of search ports for CAM + bool is_main_mem; + bool is_cache; + bool pure_ram; + bool pure_cam; + bool rpters_in_htree; // if there are repeaters in htree segment + unsigned int ver_htree_wires_over_array; + unsigned int broadcast_addr_din_over_ver_htrees; + unsigned int temp; + + unsigned int ram_cell_tech_type; + unsigned int peri_global_tech_type; + unsigned int data_arr_ram_cell_tech_type; + unsigned int data_arr_peri_global_tech_type; + unsigned int tag_arr_ram_cell_tech_type; + unsigned int tag_arr_peri_global_tech_type; + + unsigned int burst_len; + unsigned int int_prefetch_w; + unsigned int page_sz_bits; + + unsigned int ic_proj_type; // interconnect_projection_type + unsigned int wire_is_mat_type; // wire_inside_mat_type + unsigned int wire_os_mat_type; // wire_outside_mat_type + enum Wire_type wt; + int force_wiretype; + bool print_input_args; + unsigned int nuca_cache_sz; // TODO + int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm; + bool force_cache_config; + + int cache_level; + int cores; + int nuca_bank_count; + int force_nuca_bank; + + int delay_wt, dynamic_power_wt, leakage_power_wt, cycle_time_wt, area_wt; + int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, + cycle_time_wt_nuca, area_wt_nuca; + + int delay_dev, dynamic_power_dev, leakage_power_dev, cycle_time_dev, area_dev; + int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, + cycle_time_dev_nuca, area_dev_nuca; + int ed; // ED or ED2 optimization + int nuca; + + bool fast_access; + unsigned int block_sz; // bytes + unsigned int tag_assoc; + unsigned int data_assoc; + bool is_seq_acc; + bool fully_assoc; + unsigned int nsets; // == number_of_sets + int print_detail; + + bool add_ecc_b_; + // parameters for design constraint double throughput; double latency; bool pipelinable; @@ -235,8 +240,7 @@ class InputParameter bool long_channel_device; }; - -typedef struct{ +typedef struct { int Ndwl; int Ndbl; double Nspd; @@ -316,59 +320,59 @@ typedef struct{ double routing_area_height_within_bank; double routing_area_width_within_bank; double area_efficiency; -// double perc_power_dyn_routing_to_bank; -// double perc_power_dyn_addr_horizontal_htree; -// double perc_power_dyn_datain_horizontal_htree; -// double perc_power_dyn_dataout_horizontal_htree; -// double perc_power_dyn_addr_vertical_htree; -// double perc_power_dyn_datain_vertical_htree; -// double perc_power_dyn_row_predecoder_drivers; -// double perc_power_dyn_row_predecoder_blocks; -// double perc_power_dyn_row_decoders; -// double perc_power_dyn_bit_mux_predecoder_drivers; -// double perc_power_dyn_bit_mux_predecoder_blocks; -// double perc_power_dyn_bit_mux_decoders; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_1_decoders; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_2_decoders; -// double perc_power_dyn_bitlines; -// double perc_power_dyn_sense_amps; -// double perc_power_dyn_prechg_eq_drivers; -// double perc_power_dyn_subarray_output_drivers; -// double perc_power_dyn_dataout_vertical_htree; -// double perc_power_dyn_comparators; -// double perc_power_dyn_crossbar; -// double perc_power_dyn_spent_outside_mats; -// double perc_power_leak_routing_to_bank; -// double perc_power_leak_addr_horizontal_htree; -// double perc_power_leak_datain_horizontal_htree; -// double perc_power_leak_dataout_horizontal_htree; -// double perc_power_leak_addr_vertical_htree; -// double perc_power_leak_datain_vertical_htree; -// double perc_power_leak_row_predecoder_drivers; -// double perc_power_leak_row_predecoder_blocks; -// double perc_power_leak_row_decoders; -// double perc_power_leak_bit_mux_predecoder_drivers; -// double perc_power_leak_bit_mux_predecoder_blocks; -// double perc_power_leak_bit_mux_decoders; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_1_decoders; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_2_decoders; -// double perc_power_leak_bitlines; -// double perc_power_leak_sense_amps; -// double perc_power_leak_prechg_eq_drivers; -// double perc_power_leak_subarray_output_drivers; -// double perc_power_leak_dataout_vertical_htree; -// double perc_power_leak_comparators; -// double perc_power_leak_crossbar; -// double perc_leak_mats; -// double perc_active_mats; + // double perc_power_dyn_routing_to_bank; + // double perc_power_dyn_addr_horizontal_htree; + // double perc_power_dyn_datain_horizontal_htree; + // double perc_power_dyn_dataout_horizontal_htree; + // double perc_power_dyn_addr_vertical_htree; + // double perc_power_dyn_datain_vertical_htree; + // double perc_power_dyn_row_predecoder_drivers; + // double perc_power_dyn_row_predecoder_blocks; + // double perc_power_dyn_row_decoders; + // double perc_power_dyn_bit_mux_predecoder_drivers; + // double perc_power_dyn_bit_mux_predecoder_blocks; + // double perc_power_dyn_bit_mux_decoders; + // double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers; + // double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks; + // double perc_power_dyn_senseamp_mux_lev_1_decoders; + // double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers; + // double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks; + // double perc_power_dyn_senseamp_mux_lev_2_decoders; + // double perc_power_dyn_bitlines; + // double perc_power_dyn_sense_amps; + // double perc_power_dyn_prechg_eq_drivers; + // double perc_power_dyn_subarray_output_drivers; + // double perc_power_dyn_dataout_vertical_htree; + // double perc_power_dyn_comparators; + // double perc_power_dyn_crossbar; + // double perc_power_dyn_spent_outside_mats; + // double perc_power_leak_routing_to_bank; + // double perc_power_leak_addr_horizontal_htree; + // double perc_power_leak_datain_horizontal_htree; + // double perc_power_leak_dataout_horizontal_htree; + // double perc_power_leak_addr_vertical_htree; + // double perc_power_leak_datain_vertical_htree; + // double perc_power_leak_row_predecoder_drivers; + // double perc_power_leak_row_predecoder_blocks; + // double perc_power_leak_row_decoders; + // double perc_power_leak_bit_mux_predecoder_drivers; + // double perc_power_leak_bit_mux_predecoder_blocks; + // double perc_power_leak_bit_mux_decoders; + // double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers; + // double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks; + // double perc_power_leak_senseamp_mux_lev_1_decoders; + // double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers; + // double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks; + // double perc_power_leak_senseamp_mux_lev_2_decoders; + // double perc_power_leak_bitlines; + // double perc_power_leak_sense_amps; + // double perc_power_leak_prechg_eq_drivers; + // double perc_power_leak_subarray_output_drivers; + // double perc_power_leak_dataout_vertical_htree; + // double perc_power_leak_comparators; + // double perc_power_leak_crossbar; + // double perc_leak_mats; + // double perc_active_mats; double refresh_power; double dram_refresh_period; double dram_array_availability; @@ -383,98 +387,69 @@ typedef struct{ double precharge_energy; } results_mem_array; - -class uca_org_t -{ - public: - mem_array * tag_array2; - mem_array * data_array2; - double access_time; - double cycle_time; - double area; - double area_efficiency; - powerDef power; - double leak_power_with_sleep_transistors_in_mats; - double cache_ht; - double cache_len; - char file_n[100]; - double vdd_periph_global; - bool valid; - results_mem_array tag_array; - results_mem_array data_array; - std::vector uca_q;//for results share the same settings (g_ip and dyn_p) but with different tech settings such as DVFS - uca_org_t * uca_pg_reference;//for references results when power gating is enabled. - uca_org_t(); - void find_delay(); - void find_energy(); - void find_area(); - void find_cyc(); - void adjust_area();//for McPAT only to adjust routing overhead - void cleanup(); - ~uca_org_t(); +class uca_org_t { +public: + mem_array *tag_array2; + mem_array *data_array2; + double access_time; + double cycle_time; + double area; + double area_efficiency; + powerDef power; + double leak_power_with_sleep_transistors_in_mats; + double cache_ht; + double cache_len; + char file_n[100]; + double vdd_periph_global; + bool valid; + results_mem_array tag_array; + results_mem_array data_array; + std::vector + uca_q; // for results share the same settings (g_ip and dyn_p) but with + // different tech settings such as DVFS + uca_org_t + *uca_pg_reference; // for references results when power gating is enabled. + uca_org_t(); + void find_delay(); + void find_energy(); + void find_area(); + void find_cyc(); + void adjust_area(); // for McPAT only to adjust routing overhead + void cleanup(); + ~uca_org_t(); }; void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); -uca_org_t cacti_interface(const string & infile_name); -//McPAT's plain interface, please keep !!! -uca_org_t cacti_interface(InputParameter * const local_interface); -//McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter * const local_interface); -//McPAT's plain interface, please keep !!! +uca_org_t cacti_interface(const string &infile_name); +// McPAT's plain interface, please keep !!! +uca_org_t cacti_interface(InputParameter *const local_interface); +// McPAT's plain interface, please keep !!! +uca_org_t init_interface(InputParameter *const local_interface); +// McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports, - int excl_write_ports, - int single_ended_read_ports, - int search_ports, - int banks, - double tech_node, - int output_width, - int specific_tag, - int tag_width, - int access_mode, - int cache, - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, - int obj_func_leakage_power, - int obj_func_cycle_time, - int obj_func_area, - int dev_func_delay, - int dev_func_dynamic_power, - int dev_func_leakage_power, - int dev_func_area, - int dev_func_cycle_time, - int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate - int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing - int data_arr_ram_cell_tech_flavor_in, - int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, - int interconnect_projection_type_in, - int wire_inside_mat_type_in, - int wire_outside_mat_type_in, - int REPEATERS_IN_HTREE_SEGMENTS_in, - int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, - int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, - int PAGE_SIZE_BITS_in, - int BURST_LENGTH_in, - int INTERNAL_PREFETCH_WIDTH_in, - int force_wiretype, - int wiretype, - int force_config, - int ndwl, - int ndbl, - int nspd, - int ndcm, - int ndsam1, - int ndsam2, - int ecc); + int cache_size, int line_size, int associativity, int rw_ports, + int excl_read_ports, int excl_write_ports, int single_ended_read_ports, + int search_ports, int banks, double tech_node, int output_width, + int specific_tag, int tag_width, int access_mode, int cache, int main_mem, + int obj_func_delay, int obj_func_dynamic_power, int obj_func_leakage_power, + int obj_func_cycle_time, int obj_func_area, int dev_func_delay, + int dev_func_dynamic_power, int dev_func_leakage_power, int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay + // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, int wire_inside_mat_type_in, + int wire_outside_mat_type_in, int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, int PAGE_SIZE_BITS_in, + int BURST_LENGTH_in, int INTERNAL_PREFETCH_WIDTH_in, int force_wiretype, + int wiretype, int force_config, int ndwl, int ndbl, int nspd, int ndcm, + int ndsam1, int ndsam2, int ecc); // int cache_size, // int line_size, // int associativity, @@ -518,73 +493,49 @@ uca_org_t cacti_interface( // int BURST_LENGTH_in, // int INTERNAL_PREFETCH_WIDTH_in); -//Naveen's interface +// Naveen's interface uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports, - int excl_write_ports, - int single_ended_read_ports, - int banks, - double tech_node, - int page_sz, - int burst_length, - int pre_width, - int output_width, - int specific_tag, - int tag_width, - int access_mode, //0 normal, 1 seq, 2 fast - int cache, //scratch ram or cache - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, - int obj_func_leakage_power, - int obj_func_area, - int obj_func_cycle_time, - int dev_func_delay, - int dev_func_dynamic_power, - int dev_func_leakage_power, - int dev_func_area, - int dev_func_cycle_time, + int cache_size, int line_size, int associativity, int rw_ports, + int excl_read_ports, int excl_write_ports, int single_ended_read_ports, + int banks, double tech_node, int page_sz, int burst_length, int pre_width, + int output_width, int specific_tag, int tag_width, + int access_mode, // 0 normal, 1 seq, 2 fast + int cache, // scratch ram or cache + int main_mem, int obj_func_delay, int obj_func_dynamic_power, + int obj_func_leakage_power, int obj_func_area, int obj_func_cycle_time, + int dev_func_delay, int dev_func_dynamic_power, int dev_func_leakage_power, + int dev_func_area, int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay + // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, // 0 - aggressive, 1 - normal - int wire_inside_mat_type_in, - int wire_outside_mat_type_in, + int wire_inside_mat_type_in, int wire_outside_mat_type_in, int is_nuca, // 0 - UCA, 1 - NUCA int core_count, int cache_level, // 0 - L2, 1 - L3 - int nuca_bank_count, - int nuca_obj_func_delay, - int nuca_obj_func_dynamic_power, - int nuca_obj_func_leakage_power, - int nuca_obj_func_area, - int nuca_obj_func_cycle_time, - int nuca_dev_func_delay, - int nuca_dev_func_dynamic_power, - int nuca_dev_func_leakage_power, - int nuca_dev_func_area, + int nuca_bank_count, int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, int nuca_obj_func_leakage_power, + int nuca_obj_func_area, int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, int nuca_dev_func_area, int nuca_dev_func_cycle_time, - int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported + int REPEATERS_IN_HTREE_SEGMENTS_in, // TODO for now only wires with + // repeaters are supported int p_input); -class mem_array -{ - public: - int Ndcm; - int Ndwl; - int Ndbl; +class mem_array { +public: + int Ndcm; + int Ndwl; + int Ndbl; double Nspd; - int deg_bl_muxing; - int Ndsam_lev_1; - int Ndsam_lev_2; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; double access_time; double cycle_time; double multisubbank_interleave_cycle_time; @@ -602,20 +553,12 @@ class mem_array double subarray_length; double subarray_height; - double delay_route_to_bank, - delay_input_htree, - delay_row_predecode_driver_and_block, - delay_row_decoder, - delay_bitlines, - delay_sense_amp, - delay_subarray_output_driver, - delay_dout_htree, - delay_comparator, - delay_matchlines; - - double all_banks_height, - all_banks_width, - area_efficiency; + double delay_route_to_bank, delay_input_htree, + delay_row_predecode_driver_and_block, delay_row_decoder, delay_bitlines, + delay_sense_amp, delay_subarray_output_driver, delay_dout_htree, + delay_comparator, delay_matchlines; + + double all_banks_height, all_banks_width, area_efficiency; powerDef power_routing_to_bank; powerDef power_addr_input_htree; @@ -654,20 +597,22 @@ class mem_array // dram stats double activate_energy, read_energy, write_energy, precharge_energy, - refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, - leak_power_request_and_reply_networks; + refresh_power, leak_power_subbank_closed_page, + leak_power_subbank_open_page, leak_power_request_and_reply_networks; double precharge_delay; - //Power-gating stats - double array_leakage; - double wl_leakage; - double cl_leakage; + // Power-gating stats + double array_leakage; + double wl_leakage; + double cl_leakage; double sram_sleep_tx_width, wl_sleep_tx_width, cl_sleep_tx_width; double sram_sleep_tx_area, wl_sleep_tx_area, cl_sleep_tx_area; - double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, cl_sleep_wakeup_latency, bl_floating_wakeup_latency; - double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, cl_sleep_wakeup_energy, bl_floating_wakeup_energy; + double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, + cl_sleep_wakeup_latency, bl_floating_wakeup_latency; + double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, + cl_sleep_wakeup_energy, bl_floating_wakeup_energy; int num_active_mats; int num_submarray_mats; @@ -675,8 +620,7 @@ class mem_array double long_channel_leakage_reduction_periperal; double long_channel_leakage_reduction_memcell; - static bool lt(const mem_array * m1, const mem_array * m2); + static bool lt(const mem_array *m1, const mem_array *m2); }; - #endif diff --git a/cacti/component.cc b/cacti/component.cc index abe5cb9..7741ffd 100644 --- a/cacti/component.cc +++ b/cacti/component.cc @@ -29,88 +29,68 @@ * ***************************************************************************/ +#include "component.h" - +#include "bank.h" +#include "decoder.h" #include #include #include -#include "bank.h" -#include "component.h" -#include "decoder.h" - using namespace std; +Component::Component() : area(), power(), rt_power(), delay(0) {} +Component::~Component() {} -Component::Component() - :area(), power(), rt_power(),delay(0) -{ -} - - - -Component::~Component() -{ -} - - - -double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) -{ +double Component::compute_diffusion_width(int num_stacked_in, + int num_folded_tr) { double w_poly = g_ip->F_sz_um; - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain + double spacing_poly_to_poly = + g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; - if (num_folded_tr > 1) - { - total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + - (num_folded_tr - 1) * num_stacked_in * w_poly + - (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; + if (num_folded_tr > 1) { + total_diff_w += + (num_folded_tr - 2) * 2 * spacing_poly_to_poly + + (num_folded_tr - 1) * num_stacked_in * w_poly + + (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; } return total_diff_w; } - - -double Component::compute_gate_area( - int gate_type, - int num_inputs, - double w_pmos, - double w_nmos, - double h_gate) -{ - if (w_pmos <= 0.0 || w_nmos <= 0.0) - { +double Component::compute_gate_area(int gate_type, int num_inputs, + double w_pmos, double w_nmos, + double h_gate) { + if (w_pmos <= 0.0 || w_nmos <= 0.0) { return 0.0; } double w_folded_pmos, w_folded_nmos; - int num_folded_pmos, num_folded_nmos; + int num_folded_pmos, num_folded_nmos; double total_ndiff_w, total_pdiff_w; Area gate; - double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; + double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); - if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) - { + if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) { return 0.0; } - w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; - w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); + w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; + w_folded_nmos = + (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); assert(w_folded_pmos > 0); - num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); - num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); + num_folded_pmos = (int)(ceil(w_pmos / w_folded_pmos)); + num_folded_nmos = (int)(ceil(w_nmos / w_folded_nmos)); - switch (gate_type) - { + switch (gate_type) { case INV: total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); @@ -132,105 +112,91 @@ double Component::compute_gate_area( gate.w = MAX(total_ndiff_w, total_pdiff_w); - if (w_folded_nmos > w_nmos) - { - //means that the height of the gate can - //be made smaller than the input height specified, so calculate the height of the gate. - gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; - } - else - { + if (w_folded_nmos > w_nmos) { + // means that the height of the gate can + // be made smaller than the input height specified, so calculate the height + // of the gate. + gate.h = + w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; + } else { gate.h = h_gate; } return gate.get_area(); } - - double Component::compute_tr_width_after_folding( double input_width, - double threshold_folding_width) -{//This is actually the width of the cell not the width of a device. -//The width of a cell and the width of a device is orthogonal. - if (input_width <= 0) - { + double threshold_folding_width) { // This is actually the width of the cell + // not the width of a device. + // The width of a cell and the width of a device is orthogonal. + if (input_width <= 0) { return 0; } - int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double width_poly = g_ip->F_sz_um; - double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; + int num_folded_tr = (int)(ceil(input_width / threshold_folding_width)); + double spacing_poly_to_poly = + g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double width_poly = g_ip->F_sz_um; + double total_diff_width = + num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; return total_diff_width; } - - -double Component::height_sense_amplifier(double pitch_sense_amp) -{ +double Component::height_sense_amplifier(double pitch_sense_amp) { // compute the height occupied by all PMOS transistors - double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + double h_pmos_tr = + compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; // compute the height occupied by all NMOS transistors - double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + double h_nmos_tr = + compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; // compute total height by considering gap between the p and n diffusion areas return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; } - - -int Component::logical_effort( - int num_gates_min, - double g, - double F, - double * w_n, - double * w_p, - double C_load, - double p_to_n_sz_ratio, - bool is_dram_, - bool is_wl_tr_, - double max_w_nmos) -{ - int num_gates = (int) (log(F) / log(fopt)); +int Component::logical_effort(int num_gates_min, double g, double F, + double *w_n, double *w_p, double C_load, + double p_to_n_sz_ratio, bool is_dram_, + bool is_wl_tr_, double max_w_nmos) { + int num_gates = (int)(log(F) / log(fopt)); // check if num_gates is odd. if so, add 1 to make it even - num_gates+= (num_gates % 2) ? 1 : 0; + num_gates += (num_gates % 2) ? 1 : 0; num_gates = MAX(num_gates, num_gates_min); // recalculate the effective fanout of each stage double f = pow(F, 1.0 / num_gates); - int i = num_gates - 1; + int i = num_gates - 1; double C_in = C_load / f; - w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); - w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); - w_p[i] = p_to_n_sz_ratio * w_n[i]; - - if (w_n[i] > max_w_nmos) - { - double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); + w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / + gate_C(1, 0, is_dram_, false, is_wl_tr_); + w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; + + if (w_n[i] > max_w_nmos) { + double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, + is_wl_tr_); F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); - num_gates = (int) (log(F) / log(fopt)) + 1; - num_gates+= (num_gates % 2) ? 1 : 0; + num_gates = (int)(log(F) / log(fopt)) + 1; + num_gates += (num_gates % 2) ? 1 : 0; num_gates = MAX(num_gates, num_gates_min); f = pow(F, 1.0 / (num_gates - 1)); i = num_gates - 1; - w_n[i] = max_w_nmos; - w_p[i] = p_to_n_sz_ratio * w_n[i]; + w_n[i] = max_w_nmos; + w_p[i] = p_to_n_sz_ratio * w_n[i]; } - for (i = num_gates - 2; i >= 1; i--) - { - w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); + for (i = num_gates - 2; i >= 1; i--) { + w_n[i] = MAX(w_n[i + 1] / f, g_tp.min_w_nmos_); w_p[i] = p_to_n_sz_ratio * w_n[i]; } assert(num_gates <= MAX_NUMBER_GATES_STAGE); return num_gates; } - diff --git a/cacti/component.h b/cacti/component.h index 74a089d..44af1bd 100644 --- a/cacti/component.h +++ b/cacti/component.h @@ -29,56 +29,41 @@ * ***************************************************************************/ - - #ifndef __COMPONENT_H__ #define __COMPONENT_H__ -#include "parameter.h" #include "area.h" +#include "parameter.h" using namespace std; class Crossbar; class Bank; -class Component -{ - public: - Component(); - ~Component(); +class Component { +public: + Component(); + ~Component(); - Area area; - powerDef power,rt_power; - double delay; - double cycle_time; + Area area; + powerDef power, rt_power; + double delay; + double cycle_time; - double compute_gate_area( - int gate_type, - int num_inputs, - double w_pmos, - double w_nmos, - double h_gate); + double compute_gate_area(int gate_type, int num_inputs, double w_pmos, + double w_nmos, double h_gate); - double compute_tr_width_after_folding(double input_width, double threshold_folding_width); - double height_sense_amplifier(double pitch_sense_amp); + double compute_tr_width_after_folding(double input_width, + double threshold_folding_width); + double height_sense_amplifier(double pitch_sense_amp); - protected: - int logical_effort( - int num_gates_min, - double g, - double F, - double * w_n, - double * w_p, - double C_load, - double p_to_n_sz_ratio, - bool is_dram_, - bool is_wl_tr_, - double max_w_nmos); +protected: + int logical_effort(int num_gates_min, double g, double F, double *w_n, + double *w_p, double C_load, double p_to_n_sz_ratio, + bool is_dram_, bool is_wl_tr_, double max_w_nmos); - private: - double compute_diffusion_width(int num_stacked_in, int num_folded_tr); +private: + double compute_diffusion_width(int num_stacked_in, int num_folded_tr); }; #endif - diff --git a/cacti/const.h b/cacti/const.h index 1a2b197..18a1257 100644 --- a/cacti/const.h +++ b/cacti/const.h @@ -32,11 +32,11 @@ #ifndef __CONST_H__ #define __CONST_H__ +#include #include +#include #include #include -#include -#include /* The following are things you might want to change * when compiling @@ -54,47 +54,45 @@ and 40 bits on the Opteron */ const int ADDRESS_BITS = 42; -/*dt: In addition to the tag bits, the tags also include 1 valid bit, 1 dirty bit, 2 bits for a 4-state - cache coherency protocoll (MESI), 1 bit for MRU (change this to log(ways) for full LRU). +/*dt: In addition to the tag bits, the tags also include 1 valid bit, 1 dirty + bit, 2 bits for a 4-state cache coherency protocoll (MESI), 1 bit for MRU + (change this to log(ways) for full LRU). So in total we have 1 + 1 + 2 + 1 = 5 */ const int EXTRA_TAG_BITS = 5; /* limits on the various N parameters */ -const unsigned int MAXDATAN = 512; // maximum for Ndwl and Ndbl -const unsigned int MAXSUBARRAYS = 1048576; // maximum subarrays for data and tag arrays -const unsigned int MAXDATASPD = 256; // maximum for Nspd -const unsigned int MAX_COL_MUX = 256; - - +const unsigned int MAXDATAN = 512; // maximum for Ndwl and Ndbl +const unsigned int MAXSUBARRAYS = + 1048576; // maximum subarrays for data and tag arrays +const unsigned int MAXDATASPD = 256; // maximum for Nspd +const unsigned int MAX_COL_MUX = 256; #define ROUTER_TYPES 3 #define WIRE_TYPES 6 const double Cpolywire = 0; - /* Threshold voltages (as a proportion of Vdd) If you don't know them, set all values to 0.5 */ -#define VTHFA1 0.452 -#define VTHFA2 0.304 -#define VTHFA3 0.420 -#define VTHFA4 0.413 -#define VTHFA5 0.405 -#define VTHFA6 0.452 -#define VSINV 0.452 -#define VTHCOMPINV 0.437 -#define VTHMUXNAND 0.548 // TODO : this constant must be revisited -#define VTHEVALINV 0.452 +#define VTHFA1 0.452 +#define VTHFA2 0.304 +#define VTHFA3 0.420 +#define VTHFA4 0.413 +#define VTHFA5 0.405 +#define VTHFA6 0.452 +#define VSINV 0.452 +#define VTHCOMPINV 0.437 +#define VTHMUXNAND 0.548 // TODO : this constant must be revisited +#define VTHEVALINV 0.452 #define VTHSENSEEXTDRV 0.438 - -//WmuxdrvNANDn and WmuxdrvNANDp are no longer being used but it's part of the old -//delay_comparator function which we are using exactly as it used to be, so just setting these to 0 +// WmuxdrvNANDn and WmuxdrvNANDp are no longer being used but it's part of the +// old delay_comparator function which we are using exactly as it used to be, so +// just setting these to 0 const double WmuxdrvNANDn = 0; const double WmuxdrvNANDp = 0; - /*===================================================================*/ /* * The following are things you probably wouldn't want to change. @@ -102,62 +100,63 @@ const double WmuxdrvNANDp = 0; #define BIGNUM 1e30 #define INF 9999999 -#define MAX(a,b) (((a)>(b))?(a):(b)) -#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) /* Used to communicate with the horowitz model */ #define RISE 1 #define FALL 0 -#define NCH 1 -#define PCH 0 - - -#define EPSILON 0.5 //v4.1: This constant is being used in order to fix floating point -> integer -//conversion problems that were occuring within CACTI. Typical problem that was occuring was -//that with different compilers a floating point number like 3.0 would get represented as either -//2.9999....or 3.00000001 and then the integer part of the floating point number (3.0) would -//be computed differently depending on the compiler. What we are doing now is to replace -//int (x) with (int) (x+EPSILON) where EPSILON is 0.5. This would fix such problems. Note that -//this works only when x is an integer >= 0. +#define NCH 1 +#define PCH 0 + +#define EPSILON \ + 0.5 // v4.1: This constant is being used in order to fix floating point -> + // integer +// conversion problems that were occuring within CACTI. Typical problem that was +// occuring was that with different compilers a floating point number like 3.0 +// would get represented as either 2.9999....or 3.00000001 and then the integer +// part of the floating point number (3.0) would be computed differently +// depending on the compiler. What we are doing now is to replace int (x) with +// (int) (x+EPSILON) where EPSILON is 0.5. This would fix such problems. Note +// that this works only when x is an integer >= 0. /* * Sheng thinks this is more a solution to solve the simple truncate problem - * (http://www.cs.tut.fi/~jkorpela/round.html) rather than the problem mentioned above. - * Unfortunately, this solution causes nasty bugs (different results when using O0 and O3). - * Moreover, round is not correct in CACTI since when an extra fraction of bit/line is needed, - * we need to provide a complete bit/line even the fraction is just 0.01. - * So, in later version than 6.5 we use (int)ceil() to get double to int conversion. + * (http://www.cs.tut.fi/~jkorpela/round.html) rather than the problem mentioned + * above. Unfortunately, this solution causes nasty bugs (different results when + * using O0 and O3). Moreover, round is not correct in CACTI since when an extra + * fraction of bit/line is needed, we need to provide a complete bit/line even + * the fraction is just 0.01. So, in later version than 6.5 we use (int)ceil() + * to get double to int conversion. */ #define EPSILON2 0.1 #define EPSILON3 0.6 - -#define MINSUBARRAYROWS 16 //For simplicity in modeling, for the row decoding structure, we assume -//that each row predecode block is composed of at least one 2-4 decoder. When the outputs from the -//row predecode blocks are combined this means that there are at least 4*4=16 row decode outputs -#define MAXSUBARRAYROWS 262144 //Each row predecode block produces a max of 2^9 outputs. So -//the maximum number of row decode outputs will be 2^9*2^9 +#define MINSUBARRAYROWS \ + 16 // For simplicity in modeling, for the row decoding structure, we assume +// that each row predecode block is composed of at least one 2-4 decoder. When +// the outputs from the row predecode blocks are combined this means that there +// are at least 4*4=16 row decode outputs +#define MAXSUBARRAYROWS \ + 262144 // Each row predecode block produces a max of 2^9 outputs. So +// the maximum number of row decode outputs will be 2^9*2^9 #define MINSUBARRAYCOLS 2 #define MAXSUBARRAYCOLS 262144 - #define INV 0 #define NOR 1 #define NAND 2 - #define NUMBER_TECH_FLAVORS 4 -#define NUMBER_INTERCONNECT_PROJECTION_TYPES 2 //aggressive and conservative -//0 = Aggressive projections, 1 = Conservative projections -#define NUMBER_WIRE_TYPES 4 //local, semi-global and global -//1 = 'Semi-global' wire type, 2 = 'Global' wire type - +#define NUMBER_INTERCONNECT_PROJECTION_TYPES 2 // aggressive and conservative +// 0 = Aggressive projections, 1 = Conservative projections +#define NUMBER_WIRE_TYPES 4 // local, semi-global and global +// 1 = 'Semi-global' wire type, 2 = 'Global' wire type const int dram_cell_tech_flavor = 3; - -#define VBITSENSEMIN 0.08 //minimum bitline sense voltage is fixed to be 80 mV. +#define VBITSENSEMIN 0.08 // minimum bitline sense voltage is fixed to be 80 mV. #define fopt 4.0 @@ -169,7 +168,7 @@ const int dram_cell_tech_flavor = 3; // this variable can be set to carry out solution optimization for // a maximum area allocation. -#define STACKED_DIE_LAYER_ALLOTED_AREA_mm2 0 //6.24 //6.21//71.5 +#define STACKED_DIE_LAYER_ALLOTED_AREA_mm2 0 // 6.24 //6.21//71.5 // this variable can also be employed when solution optimization // with maximum area allocation is carried out. @@ -195,43 +194,41 @@ const int dram_cell_tech_flavor = 3; // this can be used to model the length of interconnect // between a bank and a crossbar -#define LENGTH_INTERCONNECT_FROM_BANK_TO_CROSSBAR 0 //3791 // 2880//micron +#define LENGTH_INTERCONNECT_FROM_BANK_TO_CROSSBAR 0 // 3791 // 2880//micron #define IS_CROSSBAR 0 #define NUMBER_INPUT_PORTS_CROSSBAR 8 #define NUMBER_OUTPUT_PORTS_CROSSBAR 8 #define NUMBER_SIGNALS_PER_PORT_CROSSBAR 256 - #define MAT_LEAKAGE_REDUCTION_DUE_TO_SLEEP_TRANSISTORS_FACTOR 1 #define LEAKAGE_REDUCTION_DUE_TO_LONG_CHANNEL_HP_TRANSISTORS_FACTOR 1 #define PAGE_MODE 0 #define MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA 60 -// We are actually not using this variable in the CACTI code. We just want to acknowledge that -// this current should be multiplied by the DDR(n) system VDD value to compute the standby power -// consumed during precharge. - +// We are actually not using this variable in the CACTI code. We just want to +// acknowledge that this current should be multiplied by the DDR(n) system VDD +// value to compute the standby power consumed during precharge. const double VDD_STORAGE_LOSS_FRACTION_WORST = 0.125; -const double CU_RESISTIVITY = 0.022; //ohm-micron -const double BULK_CU_RESISTIVITY = 0.018; //ohm-micron -const double PERMITTIVITY_FREE_SPACE = 8.854e-18; //F/micron +const double CU_RESISTIVITY = 0.022; // ohm-micron +const double BULK_CU_RESISTIVITY = 0.018; // ohm-micron +const double PERMITTIVITY_FREE_SPACE = 8.854e-18; // F/micron const static uint32_t sram_num_cells_wl_stitching_ = 16; const static uint32_t dram_num_cells_wl_stitching_ = 64; const static uint32_t comm_dram_num_cells_wl_stitching_ = 256; -const static double num_bits_per_ecc_b_ = 8.0; +const static double num_bits_per_ecc_b_ = 8.0; -const double bit_to_byte = 8.0; +const double bit_to_byte = 8.0; #define MAX_NUMBER_GATES_STAGE 20 #define MAX_NUMBER_HTREE_NODES 20 #define NAND2_LEAK_STACK_FACTOR 0.2 #define NAND3_LEAK_STACK_FACTOR 0.2 #define NOR2_LEAK_STACK_FACTOR 0.2 -#define INV_LEAK_STACK_FACTOR 0.5 +#define INV_LEAK_STACK_FACTOR 0.5 #define MAX_NUMBER_ARRAY_PARTITIONS 1000000 // abbreviations used in this project @@ -248,25 +245,21 @@ const double bit_to_byte = 8.0; // h : height or horizontal // v : vertical or velocity - -enum ram_cell_tech_type_num -{ - itrs_hp = 0, +enum ram_cell_tech_type_num { + itrs_hp = 0, itrs_lstp = 1, - itrs_lop = 2, - lp_dram = 3, + itrs_lop = 2, + lp_dram = 3, comm_dram = 4 }; -const double pppm[4] = {1,1,1,1}; -const double pppm_lkg[4] = {0,1,1,0}; -const double pppm_dyn[4] = {1,0,0,0}; -const double pppm_Isub[4] = {0,1,0,0}; -const double pppm_Ig[4] = {0,0,1,0}; -const double pppm_sc[4] = {0,0,0,1}; - -const double Ilinear_to_Isat_ratio =2.0; - +const double pppm[4] = {1, 1, 1, 1}; +const double pppm_lkg[4] = {0, 1, 1, 0}; +const double pppm_dyn[4] = {1, 0, 0, 0}; +const double pppm_Isub[4] = {0, 1, 0, 0}; +const double pppm_Ig[4] = {0, 0, 1, 0}; +const double pppm_sc[4] = {0, 0, 0, 1}; +const double Ilinear_to_Isat_ratio = 2.0; #endif diff --git a/cacti/crossbar.cc b/cacti/crossbar.cc index d7386a8..d6cf098 100644 --- a/cacti/crossbar.cc +++ b/cacti/crossbar.cc @@ -34,53 +34,53 @@ #define ASPECT_THRESHOLD .8 #define ADJ 1 -Crossbar::Crossbar( - double n_inp_, - double n_out_, - double flit_size_, - TechnologyParameter::DeviceType *dt - ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; +Crossbar::Crossbar(double n_inp_, double n_out_, double flit_size_, + TechnologyParameter::DeviceType *dt) + : n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; Vdd = dt->Vdd; CB_ADJ = 1; } -Crossbar::~Crossbar(){} +Crossbar::~Crossbar() {} -double Crossbar::output_buffer() -{ +double Crossbar::output_buffer() { - //Wire winit(4, 4); - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; + // Wire winit(4, 4); + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; Wire w1(g_ip->wt, l_eff); - //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; - double s1 = w1.repeater_size * (l_eff n_to_p_eff_curr_drv_ratio; - // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor - TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - TriS2 = s1; //driver transistor + // the model assumes input capacitance of the wire driver = input capacitance + // of nand + nor = input cap of the driver transistor + TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + TriS2 = s1; // driver transistor if (TriS1 < 1) TriS1 = 1; - double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + - gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0); -// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + -// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + -// gate_C(TriS2*g_tp.min_w_nmos_, 0)+ -// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + -// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + -// gate_C(TriS2*min_w_pmos, 0); - tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(TriS2*g_tp.min_w_nmos_, 0)+ - drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(TriS2*min_w_pmos, 0); - double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); - double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0); + double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) + + gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0); + // input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + // + + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + + // gate_C(TriS2*g_tp.min_w_nmos_, 0)+ + // drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + // gate_C(TriS2*min_w_pmos, 0); + tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(TriS2 * g_tp.min_w_nmos_, 0) + + drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(TriS2 * min_w_pmos, 0); + double output_cap = + drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); + double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0); tri_inp_cap = input_cap; tri_out_cap = output_cap; @@ -88,74 +88,97 @@ double Crossbar::output_buffer() return input_cap + output_cap + ctr_cap; } -void Crossbar::compute_power() -{ +void Crossbar::compute_power() { Wire winit(4, 4); double tri_cap = output_buffer(); assert(tri_cap > 0); - //area of a tristate logic - double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); + // area of a tristate logic + double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, + TriS2 * min_w_pmos, g_tp.cell_h_def); g_area *= 2; // to model area of output transistors - g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); - g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); - double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); + g_area += compute_gate_area(NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, + TriS1 * min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area(NOR, 2, TriS1 * g_tp.min_w_nmos_, + TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); + double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); // effective no. of tristate buffers that need to be laid side by side - int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); - double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); + int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); + double wire_len = MAX(width * ntri * n_out, + flit_size * g_tp.wire_outside_mat.pitch * n_out); Wire w1(g_ip->wt, wire_len); area.w = wire_len; - area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; + area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ; Wire w2(g_ip->wt, area.h); - double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); - if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb; + double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp); + if (aspect_ratio_cb > 1) + aspect_ratio_cb = 1 / aspect_ratio_cb; if (aspect_ratio_cb < ASPECT_THRESHOLD) { if (n_out > 2 && n_inp > 2) { - CB_ADJ+=0.2; - //cout << "CB ADJ " << CB_ADJ << endl; + CB_ADJ += 0.2; + // cout << "CB ADJ " << CB_ADJ << endl; if (CB_ADJ < 4) { this->compute_power(); } } } - - - power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; - power.readOp.leakage = n_inp * n_out * flit_size * ( - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.leakage + w2.power.readOp.leakage); - power.readOp.gate_leakage = n_inp * n_out * flit_size * ( - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); + power.readOp.dynamic = + (w1.power.readOp.dynamic + w2.power.readOp.dynamic + + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * + Vdd * Vdd) * + flit_size; + power.readOp.leakage = n_inp * n_out * flit_size * + (cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, + min_w_pmos * TriS2 * 2, 1, inv) * + Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, + min_w_pmos * TriS1 * 3, 2, nand) * + Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, + min_w_pmos * TriS1 * 3, 2, nor) * + Vdd + + w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.gate_leakage = + n_inp * n_out * flit_size * + (cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 1, + inv) * + Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, + nand) * + Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, + nor) * + Vdd + + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); // delay calculation - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; Wire wdriver(g_ip->wt, l_eff); - double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); - double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; - delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) + + tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); + double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + + n_out * tri_inp_cap + n_inp * tri_out_cap; + delay = horowitz(w1.signal_rise_time(), res * cap, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); Wire wreset(); } -void Crossbar::print_crossbar() -{ +void Crossbar::print_crossbar() { cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; cout << "Flit size : " << flit_size << " bits" << endl; cout << "Width : " << area.w << " u" << endl; cout << "Height : " << area.h << " u" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; - cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; - cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; + cout << "Dynamic Power : " + << power.readOp.dynamic * 1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage * 1e3 << " (mW)" + << endl; + cout << "Gate Leakage Power : " << power.readOp.gate_leakage * 1e3 + << " (mW)" << endl; + cout << "Crossbar Delay : " << delay * 1e12 << " ps\n"; } - - diff --git a/cacti/crossbar.h b/cacti/crossbar.h index 47339c0..5bf9e7a 100644 --- a/cacti/crossbar.h +++ b/cacti/crossbar.h @@ -29,55 +29,49 @@ * ***************************************************************************/ - #ifndef __CROSSBAR__ #define __CROSSBAR__ -#include -#include #include "basic_circuit.h" #include "cacti_interface.h" #include "component.h" -#include "parameter.h" #include "mat.h" +#include "parameter.h" #include "wire.h" -class Crossbar : public Component -{ - public: - Crossbar( - double in, - double out, - double flit_sz, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); - ~Crossbar(); +#include +#include - void print_crossbar(); - double output_buffer(); - void compute_power(); +class Crossbar : public Component { +public: + Crossbar(double in, double out, double flit_sz, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); + ~Crossbar(); - double n_inp, n_out; - double flit_size; - double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; + void print_crossbar(); + double output_buffer(); + void compute_power(); - private: - double CB_ADJ; - /* - * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar - * buffer is adjusted to get an aspect ratio of whole cross bar close to one; - * when adjust the ratio, the number of wires route over the tri-state buffers does not change, - * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase - * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch - * will increase. As a result, the height of the crossbar (area.h) will increase. - */ + double n_inp, n_out; + double flit_size; + double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; - TechnologyParameter::DeviceType *deviceType; - double TriS1, TriS2; - double min_w_pmos, Vdd; +private: + double CB_ADJ; + /* + * Adjust factor of the height of the cross-point (tri-state buffer) cell + * (layout) in crossbar buffer is adjusted to get an aspect ratio of whole + * cross bar close to one; when adjust the ratio, the number of wires route + * over the tri-state buffers does not change, however, the effective wiring + * pitch changes. Specifically, since CB_ADJ will increase during the adjust, + * the tri-state buffer will become taller and thiner, and the effective + * wiring pitch will increase. As a result, the height of the crossbar + * (area.h) will increase. + */ + TechnologyParameter::DeviceType *deviceType; + double TriS1, TriS2; + double min_w_pmos, Vdd; }; - - - #endif diff --git a/cacti/decoder.cc b/cacti/decoder.cc index 1f18629..ace6156 100644 --- a/cacti/decoder.cc +++ b/cacti/decoder.cc @@ -29,47 +29,29 @@ * ***************************************************************************/ - +#include "decoder.h" #include "area.h" -#include "decoder.h" #include "parameter.h" + +#include #include #include -#include using namespace std; - -Decoder::Decoder( - int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area & cell_, - bool power_gating_, - int nodes_DSTN_) -:exist(false), - C_ld_dec_out(_C_ld_dec_out), - R_wire_dec_out(_R_wire_dec_out), - num_gates(0), num_gates_min(2), - delay(0), - //power(), - fully_assoc(fully_assoc_), is_dram(is_dram_), - is_wl_tr(is_wl_tr_), - total_driver_nwidth(0), - total_driver_pwidth(0), - cell(cell_), - power_gating(power_gating_), - nodes_DSTN(nodes_DSTN_), - sleeptx(NULL) -{ - - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { +Decoder::Decoder(int _num_dec_signals, bool flag_way_select, + double _C_ld_dec_out, double _R_wire_dec_out, + bool fully_assoc_, bool is_dram_, bool is_wl_tr_, + const Area &cell_, bool power_gating_, int nodes_DSTN_) + : exist(false), C_ld_dec_out(_C_ld_dec_out), + R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), + // power(), + fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), + total_driver_nwidth(0), total_driver_pwidth(0), cell(cell_), + power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; w_dec_p[i] = 0; } @@ -81,62 +63,45 @@ Decoder::Decoder( */ int num_addr_bits_dec = _log2(_num_dec_signals); - if (num_addr_bits_dec < 4) - { - if (flag_way_select) - { + if (num_addr_bits_dec < 4) { + if (flag_way_select) { exist = true; num_in_signals = 2; - } - else - { + } else { num_in_signals = 0; } - } - else - { + } else { exist = true; - if (flag_way_select) - { + if (flag_way_select) { num_in_signals = 3; - } - else - { + } else { num_in_signals = 2; } } - assert(cell.h>0); - assert(cell.w>0); + assert(cell.h > 0); + assert(cell.w > 0); // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; - //area.h = 4 * cell.h; + // area.h = 4 * cell.h; area.h = g_tp.h_dec * cell.h; compute_widths(); compute_area(); - } - - -void Decoder::compute_widths() -{ +void Decoder::compute_widths() { double F; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - if (exist) - { - if (num_in_signals == 2 || fully_assoc) - { + if (exist) { + if (num_in_signals == 2 || fully_assoc) { w_dec_n[0] = 2 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand2; - } - else - { + } else { w_dec_n[0] = 3 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3; @@ -144,184 +109,172 @@ void Decoder::compute_widths() F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); - num_gates = logical_effort( - num_gates_min, - num_in_signals == 2 ? gnand2 : gnand3, - F, - w_dec_n, - w_dec_p, - C_ld_dec_out, - p_to_n_sz_ratio, - is_dram, - is_wl_tr, - g_tp.max_w_nmos_dec); - + num_gates = + logical_effort(num_gates_min, num_in_signals == 2 ? gnand2 : gnand3, F, + w_dec_n, w_dec_p, C_ld_dec_out, p_to_n_sz_ratio, is_dram, + is_wl_tr, g_tp.max_w_nmos_dec); } } - - -void Decoder::compute_area() -{ +void Decoder::compute_area() { double cumulative_area = 0; - double cumulative_curr = 0; // cumulative leakage current - double cumulative_curr_Ig = 0; // cumulative leakage current - - if (exist) - { // First check if this decoder exists - if (num_in_signals == 2) - { - cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - } - else if (num_in_signals == 3) - { - cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + if (exist) { // First check if this decoder exists + if (num_in_signals == 2) { + cumulative_area = + compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + } else if (num_in_signals == 3) { + cumulative_area = + compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + ; + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); } - for (int i = 1; i < num_gates; i++) - { - cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); - cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + for (int i = 1; i < num_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); + cumulative_curr += + cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); } power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage = cumulative_curr * g_tp.peri_global.Vcc_min; + power.readOp.power_gated_leakage = + cumulative_curr * g_tp.peri_global.Vcc_min; power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; area.w = (cumulative_area / area.h); - if (power_gating) - { - compute_power_gating(); - cumulative_area += sleeptx->area.get_area(); - area.w = (cumulative_area / area.h); - } + if (power_gating) { + compute_power_gating(); + cumulative_area += sleeptx->area.get_area(); + area.w = (cumulative_area / area.h); + } } } -void Decoder::compute_power_gating() -{ - //For all driver chains there is only one sleep transistors to save area - //Total transistor width for sleep tx calculation - for (int i = 0; i power_gating) - sleeptx = new Sleep_tx (g_ip->perfloss, - Isat_subarray, - is_footer, - c_wakeup, - detalV, - nodes_DSTN, - area); + // compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, cell.h); // Psleep tx + detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; + // if (g_ip->power_gating) + sleeptx = new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, + detalV, nodes_DSTN, area); } -double Decoder::compute_delays(double inrisetime) -{ - if (exist) - { - double ret_val = 0; // outrisetime - int i; +double Decoder::compute_delays(double inrisetime) { + if (exist) { + double ret_val = 0; // outrisetime + int i; double rd, tf, this_delay, c_load, c_intrinsic, Vpp; double Vdd = g_tp.peri_global.Vdd; - if ((is_wl_tr) && (is_dram)) - { + if ((is_wl_tr) && (is_dram)) { Vpp = g_tp.vpp; - } - else if (is_wl_tr) - { + } else if (is_wl_tr) { Vpp = g_tp.sram_cell.Vdd; - } - else - { + } else { Vpp = g_tp.peri_global.Vdd; } // first check whether a decoder is required at all rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + - drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); + c_intrinsic = + drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * + num_in_signals + + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, + is_wl_tr); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; -// cout<<"w_dec_n["<<0<<"] = "<blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) -{ +Predec::Predec(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) + : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + - blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + - blk2->power_L2.readOp.leakage; - - driver_power.readOp.power_gated_leakage = drv1->power_nand2_path.readOp.power_gated_leakage + - drv1->power_nand3_path.readOp.power_gated_leakage + - drv2->power_nand2_path.readOp.power_gated_leakage + - drv2->power_nand3_path.readOp.power_gated_leakage; - block_power.readOp.power_gated_leakage = blk1->power_nand2_path.readOp.power_gated_leakage + - blk1->power_nand3_path.readOp.power_gated_leakage + - blk1->power_L2.readOp.power_gated_leakage + - blk2->power_nand2_path.readOp.power_gated_leakage + - blk2->power_nand3_path.readOp.power_gated_leakage + - blk2->power_L2.readOp.power_gated_leakage; - - power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; - - power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + block_power.readOp.power_gated_leakage; - - driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + + driver_power.readOp.power_gated_leakage = + drv1->power_nand2_path.readOp.power_gated_leakage + + drv1->power_nand3_path.readOp.power_gated_leakage + + drv2->power_nand2_path.readOp.power_gated_leakage + + drv2->power_nand3_path.readOp.power_gated_leakage; + block_power.readOp.power_gated_leakage = + blk1->power_nand2_path.readOp.power_gated_leakage + + blk1->power_nand3_path.readOp.power_gated_leakage + + blk1->power_L2.readOp.power_gated_leakage + + blk2->power_nand2_path.readOp.power_gated_leakage + + blk2->power_nand3_path.readOp.power_gated_leakage + + blk2->power_L2.readOp.power_gated_leakage; + + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + + block_power.readOp.power_gated_leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } -void PredecBlkDrv::leakage_feedback(double temperature) -{ +void PredecBlkDrv::leakage_feedback(double temperature) { double leak_nand2_path = 0; double leak_nand3_path = 0; double gate_leak_nand2_path = 0; double gate_leak_nand3_path = 0; - if (flag_driver_exists) - { // first check whether a predecoder block driver is needed - for (int i = 0; i < number_gates_nand2_path; ++i) - { - leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); - gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); + if (flag_driver_exists) { // first check whether a predecoder block driver is + // needed + for (int i = 0; i < number_gates_nand2_path; ++i) { + leak_nand2_path += cmos_Isub_leakage( + width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); + gate_leak_nand2_path += cmos_Ig_leakage( + width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); } - leak_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - - for (int i = 0; i < number_gates_nand3_path; ++i) - { - leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); - gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + leak_nand2_path *= + (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= + (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + + for (int i = 0; i < number_gates_nand3_path; ++i) { + leak_nand3_path += cmos_Isub_leakage( + width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); + gate_leak_nand3_path += cmos_Ig_leakage( + width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); } - leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + leak_nand3_path *= + (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= + (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; - power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.gate_leakage = + gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = + gate_leak_nand3_path * g_tp.peri_global.Vdd; } } -double Predec::compute_delays(double inrisetime) -{ - // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. +double Predec::compute_delays(double inrisetime) { + // TODO: Jung Ho thinks that predecoder block driver locates between decoder + // and predecoder block. pair tmp_pair1, tmp_pair2; tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); tmp_pair1 = blk1->compute_delays(tmp_pair1); @@ -1477,28 +1370,27 @@ double Predec::compute_delays(double inrisetime) tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); driver_power.readOp.dynamic = - drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + - drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + - drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + - drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; + drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; block_power.readOp.dynamic = - blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk1->power_L2.readOp.dynamic + - blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk2->power_L2.readOp.dynamic; + blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk1->power_L2.readOp.dynamic + + blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk2->power_L2.readOp.dynamic; - power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; + power.readOp.dynamic = + driver_power.readOp.dynamic + block_power.readOp.dynamic; delay = tmp_pair1.first; - return tmp_pair1.second; + return tmp_pair1.second; } - -void Predec::leakage_feedback(double temperature) -{ +void Predec::leakage_feedback(double temperature) { drv1->leakage_feedback(temperature); drv2->leakage_feedback(temperature); blk1->leakage_feedback(temperature); @@ -1508,79 +1400,66 @@ void Predec::leakage_feedback(double temperature) drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + - blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + - blk2->power_L2.readOp.leakage; - power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; - - driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } // returns -pair Predec::get_max_delay_before_decoder( - pair input_pair1, - pair input_pair2) -{ +pair +Predec::get_max_delay_before_decoder(pair input_pair1, + pair input_pair2) { pair ret_val; double delay; delay = drv1->delay_nand2_path + blk1->delay_nand2_path; - ret_val.first = delay; + ret_val.first = delay; ret_val.second = input_pair1.first; delay = drv1->delay_nand3_path + blk1->delay_nand3_path; - if (ret_val.first < delay) - { - ret_val.first = delay; + if (ret_val.first < delay) { + ret_val.first = delay; ret_val.second = input_pair1.second; } delay = drv2->delay_nand2_path + blk2->delay_nand2_path; - if (ret_val.first < delay) - { - ret_val.first = delay; + if (ret_val.first < delay) { + ret_val.first = delay; ret_val.second = input_pair2.first; } delay = drv2->delay_nand3_path + blk2->delay_nand3_path; - if (ret_val.first < delay) - { - ret_val.first = delay; + if (ret_val.first < delay) { + ret_val.first = delay; ret_val.second = input_pair2.second; } return ret_val; } - - -Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram, bool power_gating_, int nodes_DSTN_) -:number_gates(0), - min_number_gates(2), - c_gate_load(c_gate_load_), - c_wire_load(c_wire_load_), - r_wire_load(r_wire_load_), - delay(0), -// power(), - is_dram_(is_dram), - total_driver_nwidth(0), - total_driver_pwidth(0), - power_gating(power_gating_), - nodes_DSTN(nodes_DSTN_), - sleeptx(NULL) -{ - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { +Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, + bool is_dram, bool power_gating_, int nodes_DSTN_) + : number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), + c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), + // power(), + is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0), + power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_n[i] = 0; width_p[i] = 0; } @@ -1589,95 +1468,82 @@ Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bo compute_area(); } - -void Driver::compute_widths() -{ +void Driver::compute_widths() { double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); double c_load = c_gate_load + c_wire_load; width_n[0] = g_tp.min_w_nmos_; width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); - number_gates = logical_effort( - min_number_gates, - 1, - F, - width_n, - width_p, - c_load, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); + number_gates = + logical_effort(min_number_gates, 1, F, width_n, width_p, c_load, + p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } -void Driver::compute_area() -{ +void Driver::compute_area() { double cumulative_area = 0; - area.h = g_tp.cell_h_def; - for (int i = 0; i < number_gates; i++) - { - cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); - - } + area.h = g_tp.cell_h_def; + for (int i = 0; i < number_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); + } + area.w = (cumulative_area / area.h); + if (power_gating) { + compute_power_gating(); + cumulative_area += sleeptx->area.get_area(); area.w = (cumulative_area / area.h); - if (power_gating) - { - compute_power_gating(); - cumulative_area += sleeptx->area.get_area(); - area.w = (cumulative_area / area.h); - } + } } -void Driver::compute_power_gating() -{ - //For all driver chains there is only one sleep transistors to save area - //Total transistor width for sleep tx calculation - for (int i = 0; i power_gating) - sleeptx = new Sleep_tx (g_ip->perfloss, - Isat_subarray, - is_footer, - c_wakeup, - detalV, - nodes_DSTN,//default is 1 for drivers - area); + // compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h); // Psleep tx + detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; + // if (g_ip->power_gating) + sleeptx = + new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, + nodes_DSTN, // default is 1 for drivers + area); } - -double Driver::compute_delay(double inrisetime) -{ - int i; +double Driver::compute_delay(double inrisetime) { + int i; double rd, c_load, c_intrinsic, tf; double this_delay = 0; - for (i = 0; i < number_gates - 1; ++i) - { + for (i = 0; i < number_gates - 1; ++i) { rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); + c_load = gate_C(width_n[i + 1] + width_p[i + 1], 0.0, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vcc_min; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; + power.readOp.dynamic += + (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.power_gated_leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vcc_min; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; } i = number_gates - 1; @@ -1685,15 +1551,23 @@ double Driver::compute_delay(double inrisetime) rd = tr_R_on(width_n[i], NCH, 1, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); + tf = rd * (c_intrinsic + c_load) + + r_wire_load * (c_wire_load / 2 + c_gate_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vcc_min; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; + power.readOp.dynamic += + (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.power_gated_leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vcc_min; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; return this_delay / (1.0 - 0.5); } -//TODO: add sleep tx in predec/predecblk/predecdriver +// TODO: add sleep tx in predec/predecblk/predecdriver diff --git a/cacti/decoder.h b/cacti/decoder.h index 83aefb9..b2e525e 100644 --- a/cacti/decoder.h +++ b/cacti/decoder.h @@ -29,7 +29,6 @@ * ***************************************************************************/ - #ifndef __DECODER_H__ #define __DECODER_H__ @@ -37,84 +36,67 @@ #include "component.h" #include "parameter.h" #include "powergating.h" + #include using namespace std; +class Decoder : public Component { +public: + Decoder(int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, + double _R_wire_dec_out, bool fully_assoc_, bool is_dram_, + bool is_wl_tr_, const Area &cell_, bool power_gating_ = false, + int nodes_DSTN_ = 1); -class Decoder : public Component -{ - public: - Decoder( - int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area & cell_, - bool power_gating_ = false, - int nodes_DSTN_ = 1); - - bool exist; - int num_in_signals; - double C_ld_dec_out; - double R_wire_dec_out; - int num_gates; - int num_gates_min; - double w_dec_n[MAX_NUMBER_GATES_STAGE]; - double w_dec_p[MAX_NUMBER_GATES_STAGE]; - double delay; - //powerDef power; - bool fully_assoc; - bool is_dram; - bool is_wl_tr; - - double total_driver_nwidth; - double total_driver_pwidth; - Sleep_tx * sleeptx; - - const Area & cell; - int nodes_DSTN; - bool power_gating; - - void compute_widths(); - void compute_area(); - double compute_delays(double inrisetime); // return outrisetime - void compute_power_gating(); - - void leakage_feedback(double temperature); - - ~Decoder() - { - if (sleeptx !=0) - delete sleeptx; - }; -}; + bool exist; + int num_in_signals; + double C_ld_dec_out; + double R_wire_dec_out; + int num_gates; + int num_gates_min; + double w_dec_n[MAX_NUMBER_GATES_STAGE]; + double w_dec_p[MAX_NUMBER_GATES_STAGE]; + double delay; + // powerDef power; + bool fully_assoc; + bool is_dram; + bool is_wl_tr; + + double total_driver_nwidth; + double total_driver_pwidth; + Sleep_tx *sleeptx; + const Area &cell; + int nodes_DSTN; + bool power_gating; + + void compute_widths(); + void compute_area(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_gating(); + void leakage_feedback(double temperature); + + ~Decoder() { + if (sleeptx != 0) + delete sleeptx; + }; +}; -class PredecBlk : public Component -{ - public: - PredecBlk( - int num_dec_signals, - Decoder * dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); +class PredecBlk : public Component { +public: + PredecBlk(int num_dec_signals, Decoder *dec, double C_wire_predec_blk_out, + double R_wire_predec_blk_out, int num_dec_per_predec, bool is_dram_, + bool is_blk1); - Decoder * dec; + Decoder *dec; bool exist; int number_input_addr_bits; double C_ld_predec_blk_out; double R_wire_predec_blk_out; int branch_effort_nand2_gate_output; int branch_effort_nand3_gate_output; - bool flag_two_unique_paths; + bool flag_two_unique_paths; int flag_L2_gate; int number_inputs_L1_gate; int number_gates_L1_nand2_path; @@ -143,18 +125,14 @@ class PredecBlk : public Component void leakage_feedback(double temperature); - pair compute_delays(pair inrisetime); // + pair + compute_delays(pair inrisetime); // // return }; - -class PredecBlkDrv : public Component -{ - public: - PredecBlkDrv( - int way_select, - PredecBlk * blk_, - bool is_dram); +class PredecBlkDrv : public Component { +public: + PredecBlkDrv(int way_select, PredecBlk *blk_, bool is_dram); int flag_driver_exists; int number_input_addr_bits; @@ -180,102 +158,85 @@ class PredecBlkDrv : public Component powerDef power_nand2_path; powerDef power_nand3_path; - PredecBlk * blk; - Decoder * dec; - bool is_dram_; - int way_select; + PredecBlk *blk; + Decoder *dec; + bool is_dram_; + int way_select; void compute_widths(); void compute_area(); void leakage_feedback(double temperature); - pair compute_delays( double inrisetime_nand2_path, - double inrisetime_nand3_path); // return + double inrisetime_nand3_path); // return - inline int num_addr_bits_nand2_path() - { - return num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + + inline int num_addr_bits_nand2_path() { + return num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load; } - inline int num_addr_bits_nand3_path() - { - return num_buffers_driving_2_nand3_load + - num_buffers_driving_8_nand3_load; + inline int num_addr_bits_nand3_path() { + return num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load; } double get_rdOp_dynamic_E(int num_act_mats_hor_dir); }; +class Predec : public Component { +public: + Predec(PredecBlkDrv *drv1, PredecBlkDrv *drv2); + double compute_delays(double inrisetime); // return outrisetime -class Predec : public Component -{ - public: - Predec( - PredecBlkDrv * drv1, - PredecBlkDrv * drv2); - - double compute_delays(double inrisetime); // return outrisetime - - void leakage_feedback(double temperature); - PredecBlk * blk1; - PredecBlk * blk2; - PredecBlkDrv * drv1; - PredecBlkDrv * drv2; - - powerDef block_power; - powerDef driver_power; - - private: - // returns - pair get_max_delay_before_decoder( - pair input_pair1, - pair input_pair2); + void leakage_feedback(double temperature); + PredecBlk *blk1; + PredecBlk *blk2; + PredecBlkDrv *drv1; + PredecBlkDrv *drv2; + + powerDef block_power; + powerDef driver_power; + +private: + // returns + pair + get_max_delay_before_decoder(pair input_pair1, + pair input_pair2); }; +class Driver : public Component { +public: + Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, + bool is_dram, bool power_gating_ = false, int nodes_DSTN_ = 1); - -class Driver : public Component -{ - public: - Driver(double c_gate_load_, double c_wire_load_, - double r_wire_load_, bool is_dram, - bool power_gating_ = false, - int nodes_DSTN_ = 1 ); - - int number_gates; - int min_number_gates; + int number_gates; + int min_number_gates; double width_n[MAX_NUMBER_GATES_STAGE]; double width_p[MAX_NUMBER_GATES_STAGE]; double c_gate_load; double c_wire_load; double r_wire_load; double delay; -// powerDef power; - bool is_dram_; + // powerDef power; + bool is_dram_; double total_driver_nwidth; double total_driver_pwidth; - Sleep_tx * sleeptx; + Sleep_tx *sleeptx; int nodes_DSTN; - bool power_gating; + bool power_gating; - void compute_widths(); - void compute_area(); + void compute_widths(); + void compute_area(); double compute_delay(double inrisetime); - void compute_power_gating(); + void compute_power_gating(); - ~Driver() - { - if (sleeptx !=0) - delete sleeptx; + ~Driver() { + if (sleeptx != 0) + delete sleeptx; }; }; - #endif diff --git a/cacti/htree2.cc b/cacti/htree2.cc index 5d71c93..aa2c8e8 100644 --- a/cacti/htree2.cc +++ b/cacti/htree2.cc @@ -29,43 +29,43 @@ * ***************************************************************************/ - - #include "htree2.h" + #include "wire.h" + #include #include -Htree2::Htree2( - enum Wire_type wire_model, double mat_w, double mat_h, - int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type, - bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt) - :in_rise_time(0), out_rise_time(0), - tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), - add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits), - search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), - uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt) -{ +Htree2::Htree2(enum Wire_type wire_model, double mat_w, double mat_h, + int a_bits, int d_inbits, int search_data_in, int d_outbits, + int search_data_out, int bl, int wl, enum Htree_type htree_type, + bool uca_tree_, bool search_tree_, + TechnologyParameter::DeviceType *dt) + : in_rise_time(0), out_rise_time(0), tree_type(htree_type), + mat_width(mat_w), mat_height(mat_h), add_bits(a_bits), + data_in_bits(d_inbits), search_data_in_bits(search_data_in), + data_out_bits(d_outbits), search_data_out_bits(search_data_out), ndbl(bl), + ndwl(wl), uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), + deviceType(dt) { assert(ndbl >= 2 && ndwl >= 2); -// if (ndbl == 1 && ndwl == 1) -// { -// delay = 0; -// power.readOp.dynamic = 0; -// power.readOp.leakage = 0; -// area.w = mat_w; -// area.h = mat_h; -// return; -// } -// if (ndwl == 1) ndwl++; -// if (ndbl == 1) ndbl++; - - max_unpipelined_link_delay = 0; //TODO + // if (ndbl == 1 && ndwl == 1) + // { + // delay = 0; + // power.readOp.dynamic = 0; + // power.readOp.leakage = 0; + // area.w = mat_w; + // area.h = mat_h; + // return; + // } + // if (ndwl == 1) ndwl++; + // if (ndbl == 1) ndbl++; + + max_unpipelined_link_delay = 0; // TODO min_w_nmos = g_tp.min_w_nmos_; min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - switch (htree_type) - { + switch (htree_type) { case Add_htree: wire_bw = init_wire_bw = add_bits; in_htree(); @@ -79,7 +79,8 @@ Htree2::Htree2( out_htree(); break; case Search_in_htree: - wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. + wire_bw = init_wire_bw = search_data_in_bits; // in_search_tree is broad + // cast, out_htree is not. in_htree(); break; case Search_out_htree: @@ -98,152 +99,215 @@ Htree2::Htree2( assert(power.readOp.leakage >= 0); } - - // nand gate sizing calculation -void Htree2::input_nand(double s1, double s2, double l_eff) -{ +void Htree2::input_nand(double s1, double s2, double l_eff) { Wire w1(wt, l_eff); double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; // input capacitance of a repeater = input capacitance of nand. - double nsize = s1*(1 + pton_size)/(2 + pton_size); + double nsize = s1 * (1 + pton_size) / (2 + pton_size); nsize = (nsize < 1) ? 1 : nsize; - double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) * - (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0)); - delay+= horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - power.readOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; - - power.searchOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd * wire_bw ; - power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; - power.readOp.power_gated_leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vcc_min; - power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; + double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) * + (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)); + delay += horowitz(w1.out_rise_time, tc, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); + power.readOp.dynamic += + 0.5 * + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, + g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += + 0.5 * + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, + g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * wire_bw; + power.readOp.leakage += + (wire_bw * cmos_Isub_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, nand)) * + deviceType->Vdd; + power.readOp.power_gated_leakage += + (wire_bw * cmos_Isub_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, nand)) * + deviceType->Vcc_min; + power.readOp.gate_leakage += + (wire_bw * cmos_Ig_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, nand)) * + deviceType->Vdd; } - - // tristate buffer model consisting of not, nand, nor, and driver transistors -void Htree2::output_buffer(double s1, double s2, double l_eff) -{ +void Htree2::output_buffer(double s1, double s2, double l_eff) { Wire w1(wt, l_eff); double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; // input capacitance of repeater = input capacitance of nand + nor. - double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - double s_eff = //stage eff of a repeater in a wire - (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/ - gate_C(s2*(min_w_nmos + min_w_pmos), 0); - double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0)); + double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + double s_eff = // stage eff of a repeater in a wire + (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + + w1.wire_cap(l_eff * 1e-6, true)) / + gate_C(s2 * (min_w_nmos + min_w_pmos), 0); + double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 / + (s_eff * gate_C(min_w_pmos, 0)); size = (size < 1) ? 1 : size; - double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1); - double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1); - double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(tr_size*min_w_pmos, 0); - double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + - gate_C(s1*(min_w_nmos + min_w_pmos), 0); + double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1); + double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1); + double cap_nand_out = + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(tr_size * min_w_pmos, 0); + double cap_ptrans_out = + 2 * (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0); double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; - - delay += horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - - //nand - power.readOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; - - power.searchOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //not - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; - - power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //nor - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; - - power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //output transistor - power.readOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; - - power.searchOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - if(uca_tree) { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vcc_min*wire_bw;/*inverter + output tr*/ - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vcc_min*wire_bw;//nand - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vcc_min*wire_bw;//nor - - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=; - } - else { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vcc_min*wire_bw;/*inverter + output tr*/ - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vcc_min*wire_bw;//nand - power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vcc_min*wire_bw;//nor - - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; + delay += horowitz(w1.out_rise_time, tc, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); + + // nand + power.readOp.dynamic += + 0.5 * + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += + 0.5 * + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + // not + power.readOp.dynamic += + 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += + 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + // nor + power.readOp.dynamic += + 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += + 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + // output transistor + power.readOp.dynamic += + 0.5 * + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * + 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += + 0.5 * + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * + 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + if (uca_tree) { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vdd * wire_bw; /*inverter + output tr*/ + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor + + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vcc_min * wire_bw; /*inverter + output tr*/ + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * + deviceType->Vcc_min * wire_bw; // nand + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * + deviceType->Vcc_min * wire_bw; // nor + + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vdd * wire_bw; /*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor + // power.readOp.gate_leakage *=; + } else { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vdd * wire_bw; /*inverter + output tr*/ + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor + + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vcc_min * wire_bw; /*inverter + output tr*/ + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * + deviceType->Vcc_min * wire_bw; // nand + power.readOp.power_gated_leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * + deviceType->Vcc_min * wire_bw; // nor + + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, + inv) * + deviceType->Vdd * wire_bw; /*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor + // power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; } } - - /* calculates the input h-tree delay/power * A nand gate is used at each node to * limit the signal @@ -258,109 +322,122 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) * hor. links left. After this it goes through the remaining vertical * links. */ - void -Htree2::in_htree() -{ - //temp var +void Htree2::in_htree() { + // temp var double s1 = 0, s2 = 0, s3 = 0; double l_eff = 0; Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; double len = 0, ht = 0; int option = 0; - int h = (int) _log2(ndwl/2); // horizontal nodes - int v = (int) _log2(ndbl/2); // vertical nodes + int h = (int)_log2(ndwl / 2); // horizontal nodes + int v = (int)_log2(ndbl / 2); // vertical nodes double len_temp; double ht_temp; - if (uca_tree) - {//Sheng: this computation do not consider the wires that route from edge to middle. - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { + if (uca_tree) { // Sheng: this computation do not consider the wires that + // route from edge to middle. + ht_temp = (mat_height * ndbl / 2 + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, h)))) / + 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, v)))) / + 2; + } else { if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else if (ndwl > ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / + 2; + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + +(search_data_in_bits + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / + 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / + 2; } } - area.h = ht_temp * 2; - area.w = len_temp * 2; + area.h = ht_temp * 2; + area.w = len_temp * 2; delay = 0; power.readOp.dynamic = 0; power.readOp.leakage = 0; power.readOp.power_gated_leakage = 0; - power.searchOp.dynamic =0; + power.searchOp.dynamic = 0; len = len_temp; - ht = ht_temp/2; - - while (v > 0 || h > 0) - { - if (wtemp1) delete wtemp1; - if (wtemp2) delete wtemp2; - if (wtemp3) delete wtemp3; - - if (h > v) - { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver + ht = ht_temp / 2; + + while (v > 0 || h > 0) { + if (wtemp1) + delete wtemp1; + if (wtemp2) + delete wtemp2; + if (wtemp3) + delete wtemp3; + + if (h > v) { + // the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver len_temp = len; len /= 2; wtemp3 = 0; h--; option = 0; - } - else if (v>0 && h>0) - { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor + } else if (v > 0 && h > 0) { + // considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor len_temp = len; ht_temp = ht; len /= 2; - ht /= 2; + ht /= 2; v--; h--; option = 1; - } - else - { + } else { // considers only one vertical link assert(h == 0); - wtemp1 = new Wire(wt, ht); // ver - wtemp2 = new Wire(wt, ht/2); // hor + wtemp1 = new Wire(wt, ht); // ver + wtemp2 = new Wire(wt, ht / 2); // hor ht_temp = ht; ht /= 2; wtemp3 = 0; @@ -370,75 +447,63 @@ Htree2::in_htree() delay += wtemp1->delay; power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.power_gated_leakage += wtemp1->power.readOp.power_gated_leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - if ((uca_tree == false && option == 2) || search_tree==true) - { - wire_bw*=2; // wire bandwidth doubles only for vertical branches + power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.power_gated_leakage += + wtemp1->power.readOp.power_gated_leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + if ((uca_tree == false && option == 2) || search_tree == true) { + wire_bw *= 2; // wire bandwidth doubles only for vertical branches } - if (uca_tree == false) - { - if (len_temp > wtemp1->repeater_spacing) - { + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { s1 = wtemp1->repeater_size; l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * wtemp1->repeater_size; l_eff = len_temp; } - if (ht_temp > wtemp2->repeater_spacing) - { + if (ht_temp > wtemp2->repeater_spacing) { s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * wtemp2->repeater_size; } // first level input_nand(s1, s2, l_eff); } - - if (option != 1) - { + if (option != 1) { continue; } // second level delay += wtemp2->delay; power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.power_gated_leakage += wtemp2->power.readOp.power_gated_leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - - if (uca_tree) - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { - s3 = wtemp3->repeater_size; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.power_gated_leakage += + wtemp2->power.readOp.power_gated_leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + + if (uca_tree) { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.power_gated_leakage += + (wtemp2->power.readOp.power_gated_leakage * wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.power_gated_leakage += + (wtemp2->power.readOp.power_gated_leakage * wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { + s3 = wtemp3->repeater_size; l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * wtemp3->repeater_size; l_eff = ht_temp; } @@ -446,13 +511,14 @@ Htree2::in_htree() } } - if (wtemp1) delete wtemp1; - if (wtemp2) delete wtemp2; - if (wtemp3) delete wtemp3; + if (wtemp1) + delete wtemp1; + if (wtemp2) + delete wtemp2; + if (wtemp3) + delete wtemp3; } - - /* a tristate buffer is used to handle fan-ins * The area of an unbalanced htree (rows != columns) * depends on how data is traversed. @@ -465,59 +531,75 @@ Htree2::in_htree() * hor. links left. After this it goes through the remaining vertical * links. */ -void Htree2::out_htree() -{ - //temp var +void Htree2::out_htree() { + // temp var double s1 = 0, s2 = 0, s3 = 0; double l_eff = 0; Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; double len = 0, ht = 0; int option = 0; - int h = (int) _log2(ndwl/2); - int v = (int) _log2(ndbl/2); + int h = (int)_log2(ndwl / 2); + int v = (int)_log2(ndbl / 2); double len_temp; double ht_temp; - if (uca_tree) - { - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { + if (uca_tree) { + ht_temp = (mat_height * ndbl / 2 + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, h)))) / + 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, v)))) / + 2; + } else { if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - - } - else if (ndwl > ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / + 2; + + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / + 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = + ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / + 2; + len_temp = + (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / + 2; } } area.h = ht_temp * 2; @@ -527,31 +609,32 @@ void Htree2::out_htree() power.readOp.leakage = 0; power.readOp.power_gated_leakage = 0; power.readOp.gate_leakage = 0; - //cout<<"power.readOp.gate_leakage"< 0 || h > 0) - { //finds delay/power of each link in the tree - if (wtemp1) delete wtemp1; - if (wtemp2) delete wtemp2; - if (wtemp3) delete wtemp3; - - if(h > v) { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver + ht = ht_temp / 2; + + while (v > 0 || h > 0) { // finds delay/power of each link in the tree + if (wtemp1) + delete wtemp1; + if (wtemp2) + delete wtemp2; + if (wtemp3) + delete wtemp3; + + if (h > v) { + // the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver len_temp = len; len /= 2; wtemp3 = 0; h--; option = 0; - } - else if (v>0 && h>0) { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor + } else if (v > 0 && h > 0) { + // considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor len_temp = len; ht_temp = ht; len /= 2; @@ -559,12 +642,11 @@ void Htree2::out_htree() v--; h--; option = 1; - } - else { + } else { // considers only one vertical link assert(h == 0); - wtemp1 = new Wire(wt, ht); // hor - wtemp2 = new Wire(wt, ht/2); // ver + wtemp1 = new Wire(wt, ht); // hor + wtemp2 = new Wire(wt, ht / 2); // ver ht_temp = ht; ht /= 2; wtemp3 = 0; @@ -573,87 +655,77 @@ void Htree2::out_htree() } delay += wtemp1->delay; power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.power_gated_leakage += wtemp1->power.readOp.power_gated_leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"<power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.power_gated_leakage += + wtemp1->power.readOp.power_gated_leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + // cout<<"power.readOp.gate_leakage"< wtemp1->repeater_spacing) - { + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { s1 = wtemp1->repeater_size; l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * wtemp1->repeater_size; l_eff = len_temp; } - if (ht_temp > wtemp2->repeater_spacing) - { + if (ht_temp > wtemp2->repeater_spacing) { s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * wtemp2->repeater_size; } // first level output_buffer(s1, s2, l_eff); } - - if (option != 1) - { + if (option != 1) { continue; } // second level delay += wtemp2->delay; power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.power_gated_leakage += wtemp2->power.readOp.power_gated_leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"<power.readOp.leakage*wire_bw); - power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.power_gated_leakage += + wtemp2->power.readOp.power_gated_leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + // cout<<"power.readOp.gate_leakage"<power.readOp.leakage * wire_bw); + power.readOp.power_gated_leakage += + (wtemp2->power.readOp.power_gated_leakage * wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.power_gated_leakage += + (wtemp2->power.readOp.power_gated_leakage * wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { s3 = wtemp3->repeater_size; l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * wtemp3->repeater_size; l_eff = ht_temp; } output_buffer(s2, s3, l_eff); } - //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage<power.readOp.gate_leakage"<power.readOp.gate_leakage< #include #include #include - -#include "io.h" -#include "area.h" -#include "basic_circuit.h" -#include "parameter.h" -#include "Ucache.h" -#include "nuca.h" -#include "crossbar.h" -#include "arbiter.h" -#include "version_cacti.h" //#include "highradix.h" using namespace std; - InputParameter::InputParameter() -: array_power_gated(false), - bitline_floating(false), - wl_power_gated(false), - cl_power_gated(false), - interconect_power_gated(false), - power_gating(false), - perfloss(0.01), - cl_vertical (true), - long_channel_device(false) -{ - dvs_voltage = std::vector(0); + : array_power_gated(false), bitline_floating(false), wl_power_gated(false), + cl_power_gated(false), interconect_power_gated(false), + power_gating(false), perfloss(0.01), cl_vertical(true), + long_channel_device(false) { + dvs_voltage = std::vector(0); } /* Parses "cache.cfg" file */ - void -InputParameter::parse_cfg(const string & in_file) -{ +void InputParameter::parse_cfg(const string &in_file) { FILE *fp = fopen(in_file.c_str(), "r"); char line[5000]; char jk[5000]; char temp_var[5000]; double temp_double; char *data = line; - int offset= 0; + int offset = 0; - if(!fp) { + if (!fp) { cout << in_file << " is missing!\n"; exit(-1); } - while(fscanf(fp, "%[^\n]\n", line) != EOF) { + while (fscanf(fp, "%[^\n]\n", line) != EOF) { if (!strncmp("-size", line, strlen("-size"))) { sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz)); @@ -98,7 +88,8 @@ InputParameter::parse_cfg(const string & in_file) continue; } - if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) { + if (!strncmp("-internal prefetch width", line, + strlen("-internal prefetch width"))) { sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w)); continue; } @@ -123,14 +114,13 @@ InputParameter::parse_cfg(const string & in_file) continue; } - if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) { + if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) { sscanf(line, "-exclusive write port %d", &(num_wr_ports)); continue; } if (!strncmp("-single ended", line, strlen("-single ended"))) { - sscanf(line, "-single %[(:-~)*]%d", jk, - &(num_se_rd_ports)); + sscanf(line, "-single %[(:-~)*]%d", jk, &(num_se_rd_ports)); continue; } @@ -146,7 +136,7 @@ InputParameter::parse_cfg(const string & in_file) if (!strncmp("-technology", line, strlen("-technology"))) { sscanf(line, "-technology (u) %lf", &(F_sz_um)); - F_sz_nm = F_sz_um*1000; + F_sz_nm = F_sz_um * 1000; continue; } @@ -155,11 +145,11 @@ InputParameter::parse_cfg(const string & in_file) if (!strncmp("default", temp_var, sizeof("default"))) { specific_hp_vdd = false; hp_Vdd = 1.0; /* - * if this is by default, then the vdd value in g_ip here does not matter - */ - } - else { - specific_hp_vdd = true; + * if this is by default, then the vdd value in g_ip here + * does not matter + */ + } else { + specific_hp_vdd = true; sscanf(line, "-hp Vdd (V) %lf", &(hp_Vdd)); } continue; @@ -170,11 +160,11 @@ InputParameter::parse_cfg(const string & in_file) if (!strncmp("default", temp_var, sizeof("default"))) { specific_lstp_vdd = false; lstp_Vdd = 1.0; /* - * if this is by default, then the vdd value in g_ip here does not matter - */ - } - else { - specific_lstp_vdd = true; + * if this is by default, then the vdd value in g_ip + * here does not matter + */ + } else { + specific_lstp_vdd = true; sscanf(line, "-lstp Vdd (V) %lf", &(lstp_Vdd)); } continue; @@ -185,48 +175,49 @@ InputParameter::parse_cfg(const string & in_file) if (!strncmp("default", temp_var, sizeof("default"))) { specific_lop_vdd = false; lop_Vdd = 1.0; /* - * if this is by default, then the vdd value in g_ip here does not matter - */ - } - else { - specific_lop_vdd = true; + * if this is by default, then the vdd value in g_ip here + * does not matter + */ + } else { + specific_lop_vdd = true; sscanf(line, "-lop Vdd (V) %lf", &(lop_Vdd)); } continue; } if (!strncmp("-DVS(V):", line, strlen("-DVS(V):"))) { - memmove (line,line+9,strlen(line)); - while (1 == sscanf(data, "%lf%n", &temp_double, &offset)) { - data += offset; - dvs_voltage.push_back(temp_double); - } -// dvs_levels = dvs_voltage.size(); - continue; + memmove(line, line + 9, strlen(line)); + while (1 == sscanf(data, "%lf%n", &temp_double, &offset)) { + data += offset; + dvs_voltage.push_back(temp_double); + } + // dvs_levels = dvs_voltage.size(); + continue; } - if (!strncmp("-Powergating voltage", line, strlen("-Powergating voltage"))) { + if (!strncmp("-Powergating voltage", line, + strlen("-Powergating voltage"))) { sscanf(line, "-Powergating voltage%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("default", temp_var, sizeof("default"))) { - specific_vcc_min= false; + specific_vcc_min = false; user_defined_vcc_min = 1.0; /* - * if this is by default, then the vdd value in g_ip here does not matter - */ - } - else { - specific_vcc_min = true; + * if this is by default, then the vdd value + * in g_ip here does not matter + */ + } else { + specific_vcc_min = true; sscanf(line, "-Powergating voltage (V) %lf", &(user_defined_vcc_min)); } continue; } - if (!strncmp("-output/input", line, strlen("-output/input"))) { sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w)); continue; } - if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) { + if (!strncmp("-operating temperature", line, + strlen("-operating temperature"))) { sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp)); continue; } @@ -236,49 +227,43 @@ InputParameter::parse_cfg(const string & in_file) if (!strncmp("cache", temp_var, sizeof("cache"))) { is_cache = true; - } - else - { + } else { is_cache = false; } if (!strncmp("main memory", temp_var, sizeof("main memory"))) { is_main_mem = true; - } - else { + } else { is_main_mem = false; } if (!strncmp("cam", temp_var, sizeof("cam"))) { pure_cam = true; - } - else { + } else { pure_cam = false; } if (!strncmp("ram", temp_var, sizeof("ram"))) { pure_ram = true; - } - else { - if (!is_main_mem) - pure_ram = false; - else - pure_ram = true; + } else { + if (!is_main_mem) + pure_ram = false; + else + pure_ram = true; } continue; } - if (!strncmp("-tag size", line, strlen("-tag size"))) { sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("default", temp_var, sizeof("default"))) { specific_tag = false; - tag_w = 42; /* the acutal value is calculated - * later based on the cache size, bank count, and associativity - */ - } - else { + tag_w = + 42; /* the acutal value is calculated + * later based on the cache size, bank count, and associativity + */ + } else { specific_tag = true; sscanf(line, "-tag size (b) %d", &(tag_w)); } @@ -289,166 +274,145 @@ InputParameter::parse_cfg(const string & in_file) sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("fast", temp_var, strlen("fast"))) { access_mode = 2; - } - else if (!strncmp("sequential", temp_var, strlen("sequential"))) { + } else if (!strncmp("sequential", temp_var, strlen("sequential"))) { access_mode = 1; - } - else if(!strncmp("normal", temp_var, strlen("normal"))) { + } else if (!strncmp("normal", temp_var, strlen("normal"))) { access_mode = 0; - } - else { + } else { cout << "ERROR: Invalid access mode!\n"; exit(0); } continue; } - if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) { + if (!strncmp("-Data array cell type", line, + strlen("-Data array cell type"))) { sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var); - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { data_arr_ram_cell_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { data_arr_ram_cell_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { data_arr_ram_cell_tech_type = 2; - } - else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { + } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { data_arr_ram_cell_tech_type = 3; - } - else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { + } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { data_arr_ram_cell_tech_type = 4; - } - else { + } else { cout << "ERROR: Invalid type!\n"; exit(0); } continue; } - if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) { - sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("-Data array peripheral type", line, + strlen("-Data array peripheral type"))) { + sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, + temp_var); - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { data_arr_peri_global_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { data_arr_peri_global_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { data_arr_peri_global_tech_type = 2; - } - else { + } else { cout << "ERROR: Invalid type!\n"; exit(0); } continue; } - if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) { + if (!strncmp("-Tag array cell type", line, + strlen("-Tag array cell type"))) { sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var); - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { tag_arr_ram_cell_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { tag_arr_ram_cell_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { tag_arr_ram_cell_tech_type = 2; - } - else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { + } else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { tag_arr_ram_cell_tech_type = 3; - } - else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { + } else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { tag_arr_ram_cell_tech_type = 4; - } - else { + } else { cout << "ERROR: Invalid type!\n"; exit(0); } continue; } - if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) { + if (!strncmp("-Tag array peripheral type", line, + strlen("-Tag array peripheral type"))) { sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { tag_arr_peri_global_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + } else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { tag_arr_peri_global_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + } else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { tag_arr_peri_global_tech_type = 2; - } - else { + } else { cout << "ERROR: Invalid type!\n"; exit(0); } continue; } - if(!strncmp("-design", line, strlen("-design"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_wt), &(dynamic_power_wt), - &(leakage_power_wt), - &(cycle_time_wt), &(area_wt)); + if (!strncmp("-design", line, strlen("-design"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_wt), + &(dynamic_power_wt), &(leakage_power_wt), &(cycle_time_wt), + &(area_wt)); continue; } - if(!strncmp("-deviate", line, strlen("-deviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_dev), &(dynamic_power_dev), - &(leakage_power_dev), - &(cycle_time_dev), &(area_dev)); + if (!strncmp("-deviate", line, strlen("-deviate"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_dev), + &(dynamic_power_dev), &(leakage_power_dev), &(cycle_time_dev), + &(area_dev)); continue; } - if(!strncmp("-Optimize", line, strlen("-Optimize"))) { + if (!strncmp("-Optimize", line, strlen("-Optimize"))) { sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var); - if(!strncmp("ED^2", temp_var, strlen("ED^2"))) { + if (!strncmp("ED^2", temp_var, strlen("ED^2"))) { ed = 2; - } - else if(!strncmp("ED", temp_var, strlen("ED"))) { + } else if (!strncmp("ED", temp_var, strlen("ED"))) { ed = 1; - } - else { + } else { ed = 0; } } - if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_wt_nuca), &(dynamic_power_wt_nuca), - &(leakage_power_wt_nuca), - &(cycle_time_wt_nuca), &(area_wt_nuca)); + if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_wt_nuca), + &(dynamic_power_wt_nuca), &(leakage_power_wt_nuca), + &(cycle_time_wt_nuca), &(area_wt_nuca)); continue; } - if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_dev_nuca), &(dynamic_power_dev_nuca), - &(leakage_power_dev_nuca), - &(cycle_time_dev_nuca), &(area_dev_nuca)); + if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_dev_nuca), + &(dynamic_power_dev_nuca), &(leakage_power_dev_nuca), + &(cycle_time_dev_nuca), &(area_dev_nuca)); continue; } - if(!strncmp("-Cache model", line, strlen("-cache model"))) { + if (!strncmp("-Cache model", line, strlen("-cache model"))) { sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("UCA", temp_var, strlen("UCA"))) { nuca = 0; - } - else { + } else { nuca = 1; } continue; } - if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) { + if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) { sscanf(line, "-NUCA bank count %d", &(nuca_bank_count)); if (nuca_bank_count != 0) { @@ -457,84 +421,73 @@ InputParameter::parse_cfg(const string & in_file) continue; } - if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) { + if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) { sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("global", temp_var, strlen("global"))) { wire_is_mat_type = 2; continue; - } - else if (!strncmp("local", temp_var, strlen("local"))) { + } else if (!strncmp("local", temp_var, strlen("local"))) { wire_is_mat_type = 0; continue; - } - else { + } else { wire_is_mat_type = 1; continue; } } - if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) { + if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) { sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("global", temp_var, strlen("global"))) { wire_os_mat_type = 2; - } - else { + } else { wire_os_mat_type = 1; } continue; } - if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) { + if (!strncmp("-Interconnect projection", line, + strlen("-Interconnect projection"))) { sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("aggressive", temp_var, strlen("aggressive"))) { ic_proj_type = 0; - } - else { + } else { ic_proj_type = 1; } continue; } - if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) { + if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) { sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("default", temp_var, strlen("default"))) { force_wiretype = 0; wt = Global; - } - else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) { + } else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) { force_wiretype = 1; wt = Global_10; - } - else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) { + } else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) { force_wiretype = 1; wt = Global_20; - } - else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) { + } else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) { force_wiretype = 1; wt = Global_30; - } - else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) { + } else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) { force_wiretype = 1; wt = Global_5; - } - else if (!(strncmp("Global", temp_var, strlen("Global")))) { + } else if (!(strncmp("Global", temp_var, strlen("Global")))) { force_wiretype = 1; wt = Global; - } - else { + } else { wt = Low_swing; force_wiretype = 1; } continue; } - - - if(!strncmp("-Core", line, strlen("-Core"))) { + if (!strncmp("-Core", line, strlen("-Core"))) { sscanf(line, "-Core count %d\n", &(cores)); if (cores > 16) { printf("No. of cores should be less than 16!\n"); @@ -542,174 +495,161 @@ InputParameter::parse_cfg(const string & in_file) continue; } - if(!strncmp("-Cache level", line, strlen("-Cache level"))) { + if (!strncmp("-Cache level", line, strlen("-Cache level"))) { sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("L2", temp_var, strlen("L2"))) { cache_level = 0; - } - else { + } else { cache_level = 1; } } - if(!strncmp("-Print level", line, strlen("-Print level"))) { + if (!strncmp("-Print level", line, strlen("-Print level"))) { sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { print_detail = 1; - } - else { + } else { print_detail = 0; } - } - if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) { + if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) { sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { add_ecc_b_ = true; - } - else { + } else { add_ecc_b_ = false; } } - if(!strncmp("-CLDriver vertical", line, strlen("-CLDriver vertical"))) { - sscanf(line, "-CLDriver vertical %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - cl_vertical = true; - } - else { - cl_vertical = false; - } - } - - if(!strncmp("-Array Power Gating", line, strlen("-Array Power Gating"))) { - sscanf(line, "-Array Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - array_power_gated = true; - } - else { - array_power_gated = false; - } - } - - if(!strncmp("-Bitline floating", line, strlen("-Bitline floating"))) { - sscanf(line, "-Bitline floating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - bitline_floating = true; - } - else { - bitline_floating = false; - } - } - - if(!strncmp("-WL Power Gating", line, strlen("-WL Power Gating"))) { - sscanf(line, "-WL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - wl_power_gated = true; - } - else { - wl_power_gated = false; - } - } - - if(!strncmp("-CL Power Gating", line, strlen("-CL Power Gating"))) { - sscanf(line, "-CL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - cl_power_gated = true; - } - else { - cl_power_gated = false; - } - } - - if(!strncmp("-Interconnect Power Gating", line, strlen("-Interconnect Power Gating"))) { - sscanf(line, "-Interconnect Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - interconect_power_gated = true; - } - else { - interconect_power_gated = false; - } - } - - if(!strncmp("-Power Gating Performance Loss", line, strlen("-Power Gating Performance Loss"))) { - sscanf(line, "-Power Gating Performance Loss %lf", &(perfloss)); - continue; + if (!strncmp("-CLDriver vertical", line, strlen("-CLDriver vertical"))) { + sscanf(line, "-CLDriver vertical %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + cl_vertical = true; + } else { + cl_vertical = false; + } } - if(!strncmp("-Power Gating", line, strlen("-Power Gating"))) { - sscanf(line, "-Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - power_gating = true; - } - else { - power_gating = false; - } + if (!strncmp("-Array Power Gating", line, strlen("-Array Power Gating"))) { + sscanf(line, "-Array Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + array_power_gated = true; + } else { + array_power_gated = false; + } } - if(!strncmp("-Long channel devices", line, strlen("-Long channel devices"))) { - sscanf(line, "-Long channel devices %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("-Bitline floating", line, strlen("-Bitline floating"))) { + sscanf(line, "-Bitline floating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + bitline_floating = true; + } else { + bitline_floating = false; + } + } + + if (!strncmp("-WL Power Gating", line, strlen("-WL Power Gating"))) { + sscanf(line, "-WL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + wl_power_gated = true; + } else { + wl_power_gated = false; + } + } + + if (!strncmp("-CL Power Gating", line, strlen("-CL Power Gating"))) { + sscanf(line, "-CL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + cl_power_gated = true; + } else { + cl_power_gated = false; + } + } + + if (!strncmp("-Interconnect Power Gating", line, + strlen("-Interconnect Power Gating"))) { + sscanf(line, "-Interconnect Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + interconect_power_gated = true; + } else { + interconect_power_gated = false; + } + } + + if (!strncmp("-Power Gating Performance Loss", line, + strlen("-Power Gating Performance Loss"))) { + sscanf(line, "-Power Gating Performance Loss %lf", &(perfloss)); + continue; + } + + if (!strncmp("-Power Gating", line, strlen("-Power Gating"))) { + sscanf(line, "-Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { - long_channel_device = true; + power_gating = true; + } else { + power_gating = false; } - else { - long_channel_device = false; + } + + if (!strncmp("-Long channel devices", line, + strlen("-Long channel devices"))) { + sscanf(line, "-Long channel devices %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + long_channel_device = true; + } else { + long_channel_device = false; } } - if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { + if (!strncmp("-Print input parameters", line, + strlen("-Print input parameters"))) { sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { print_input_args = true; - } - else { + } else { print_input_args = false; } } - if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) { + if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) { sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { force_cache_config = true; - } - else { + } else { force_cache_config = false; } } - if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) { + if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) { sscanf(line, "-Ndbl %d\n", &(ndbl)); continue; } - if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) { + if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) { sscanf(line, "-Ndwl %d\n", &(ndwl)); continue; } - if(!strncmp("-Nspd", line, strlen("-Nspd"))) { + if (!strncmp("-Nspd", line, strlen("-Nspd"))) { sscanf(line, "-Nspd %d\n", &(nspd)); continue; } - if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { + if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); continue; } - if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { + if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); continue; } - if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) { + if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) { sscanf(line, "-Ndcm %d\n", &(ndcm)); continue; } - } rpters_in_htree = true; fclose(fp); } - void -InputParameter::display_ip() -{ +void InputParameter::display_ip() { cout << "Cache size : " << cache_sz << endl; cout << "Block size : " << line_sz << endl; cout << "Associativity : " << assoc << endl; @@ -717,64 +657,67 @@ InputParameter::display_ip() cout << "Write only ports : " << num_wr_ports << endl; cout << "Read write ports : " << num_rw_ports << endl; cout << "Single ended read ports : " << num_se_rd_ports << endl; - if (fully_assoc||pure_cam) - { - cout << "Search ports : " << num_search_ports << endl; + if (fully_assoc || pure_cam) { + cout << "Search ports : " << num_search_ports << endl; } cout << "Cache banks (UCA) : " << nbanks << endl; cout << "Technology : " << F_sz_um << endl; - cout << "User specified HP Vdd (v)? : " << std::boolalpha << specific_hp_vdd << endl; - if (specific_hp_vdd) - { - cout << "User defined HP Vdd (v) : " << hp_Vdd << endl; + cout << "User specified HP Vdd (v)? : " << std::boolalpha + << specific_hp_vdd << endl; + if (specific_hp_vdd) { + cout << "User defined HP Vdd (v) : " << hp_Vdd << endl; } - cout << "User specified LSTP Vdd (v)? : " << std::boolalpha << specific_lstp_vdd << endl; - if (specific_lstp_vdd) - { - cout << "User defined HP Vdd (v) : " << lstp_Vdd << endl; + cout << "User specified LSTP Vdd (v)? : " << std::boolalpha + << specific_lstp_vdd << endl; + if (specific_lstp_vdd) { + cout << "User defined HP Vdd (v) : " << lstp_Vdd << endl; } - cout << "User specified LOP Vdd (v)? : " << std::boolalpha << specific_lop_vdd << endl; - if (specific_lop_vdd) - { - cout << "User defined HP Vdd (v) : " << lop_Vdd << endl; + cout << "User specified LOP Vdd (v)? : " << std::boolalpha + << specific_lop_vdd << endl; + if (specific_lop_vdd) { + cout << "User defined HP Vdd (v) : " << lop_Vdd << endl; } cout << "Temperature : " << temp << endl; cout << "Tag size : " << tag_w << endl; if (is_cache) { - cout << "array type : " << "Cache" << endl; + cout << "array type : " + << "Cache" << endl; } if (pure_ram) { - cout << "array type : " << "Scratch RAM" << endl; + cout << "array type : " + << "Scratch RAM" << endl; } - if (pure_cam) - { - cout << "array type : " << "CAM" << endl; + if (pure_cam) { + cout << "array type : " + << "CAM" << endl; } cout << "Model as memory : " << is_main_mem << endl; cout << "Access mode : " << access_mode << endl; - cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; - cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; - cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; - cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; + cout << "Data array cell type : " << data_arr_ram_cell_tech_type + << endl; + cout << "Data array peripheral type : " << data_arr_peri_global_tech_type + << endl; + cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type + << endl; + cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type + << endl; cout << "Optimization target : " << ed << endl; cout << "Design objective (UCA wt) : " << delay_wt << " " - << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt - << " " << area_wt << endl; + << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt + << " " << area_wt << endl; cout << "Design objective (UCA dev) : " << delay_dev << " " - << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev - << " " << area_dev << endl; - if (nuca) - { + << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev + << " " << area_dev << endl; + if (nuca) { cout << "Cores : " << cores << endl; - cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " - << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca - << " " << area_wt_nuca << endl; + << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " + << cycle_time_wt_nuca << " " << area_wt_nuca << endl; cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " - << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca - << " " << area_dev_nuca << endl; - } + << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " + << cycle_time_dev_nuca << " " << area_dev_nuca << endl; + } cout << "Cache model : " << nuca << endl; cout << "Nuca bank : " << nuca_bank_count << endl; cout << "Wire inside mat : " << wire_is_mat_type << endl; @@ -786,7 +729,8 @@ InputParameter::display_ip() cout << "Page size : " << page_sz_bits << endl; cout << "Burst length : " << burst_len << endl; cout << "Internal prefetch width : " << int_prefetch_w << endl; - cout << "Force cache config : " << g_ip->force_cache_config << endl; + cout << "Force cache config : " << g_ip->force_cache_config + << endl; if (g_ip->force_cache_config) { cout << "Ndwl : " << g_ip->ndwl << endl; cout << "Ndbl : " << g_ip->ndbl << endl; @@ -795,239 +739,218 @@ InputParameter::display_ip() cout << "Ndsam1 : " << g_ip->ndsam1 << endl; cout << "Ndsam2 : " << g_ip->ndsam2 << endl; } - // cout << "Placing subarray out driver vertical? : " << g_ip->cl_vertical << endl; + // cout << "Placing subarray out driver vertical? : " << g_ip->cl_vertical + // << endl; } - - -powerComponents operator+(const powerComponents & x, const powerComponents & y) -{ +powerComponents operator+(const powerComponents &x, const powerComponents &y) { powerComponents z; z.dynamic = x.dynamic + y.dynamic; z.leakage = x.leakage + y.leakage; - z.gate_leakage = x.gate_leakage + y.gate_leakage; + z.gate_leakage = x.gate_leakage + y.gate_leakage; z.short_circuit = x.short_circuit + y.short_circuit; - z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; + z.longer_channel_leakage = + x.longer_channel_leakage + y.longer_channel_leakage; z.power_gated_leakage = x.power_gated_leakage + y.power_gated_leakage; - z.power_gated_with_long_channel_leakage = x.power_gated_with_long_channel_leakage + y.power_gated_with_long_channel_leakage; + z.power_gated_with_long_channel_leakage = + x.power_gated_with_long_channel_leakage + + y.power_gated_with_long_channel_leakage; return z; } -powerComponents operator*(const powerComponents & x, double const * const y) -{ +powerComponents operator*(const powerComponents &x, double const *const y) { powerComponents z; - z.dynamic = x.dynamic*y[0]; - z.leakage = x.leakage*y[1]; - z.gate_leakage = x.gate_leakage*y[2]; - z.short_circuit = x.short_circuit*y[3]; - z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage - z.power_gated_leakage = x.power_gated_leakage*y[1];//power_gated_leakage has the same behavior as normal leakage - z.power_gated_with_long_channel_leakage = x.power_gated_with_long_channel_leakage*y[1];//power_gated_with_long_channel_leakage has the same behavior as normal leakage + z.dynamic = x.dynamic * y[0]; + z.leakage = x.leakage * y[1]; + z.gate_leakage = x.gate_leakage * y[2]; + z.short_circuit = x.short_circuit * y[3]; + z.longer_channel_leakage = + x.longer_channel_leakage * + y[1]; // longer channel leakage has the same behavior as normal leakage + z.power_gated_leakage = + x.power_gated_leakage * + y[1]; // power_gated_leakage has the same behavior as normal leakage + z.power_gated_with_long_channel_leakage = + x.power_gated_with_long_channel_leakage * + y[1]; // power_gated_with_long_channel_leakage has the same behavior as + // normal leakage return z; } - -powerDef operator+(const powerDef & x, const powerDef & y) -{ +powerDef operator+(const powerDef &x, const powerDef &y) { powerDef z; - z.readOp = x.readOp + y.readOp; - z.writeOp = x.writeOp + y.writeOp; + z.readOp = x.readOp + y.readOp; + z.writeOp = x.writeOp + y.writeOp; z.searchOp = x.searchOp + y.searchOp; return z; } -powerDef operator*(const powerDef & x, double const * const y) -{ +powerDef operator*(const powerDef &x, double const *const y) { powerDef z; - z.readOp = x.readOp*y; - z.writeOp = x.writeOp*y; - z.searchOp = x.searchOp*y; + z.readOp = x.readOp * y; + z.writeOp = x.writeOp * y; + z.searchOp = x.searchOp * y; return z; } -uca_org_t cacti_interface(const string & infile_name) -{ +uca_org_t cacti_interface(const string &infile_name) { uca_org_t fin_res; - //uca_org_t result; + // uca_org_t result; fin_res.valid = false; g_ip = new InputParameter(); g_ip->parse_cfg(infile_name); - if(!g_ip->error_checking()) - exit(0); + if (!g_ip->error_checking()) + exit(0); if (g_ip->print_input_args) g_ip->display_ip(); - init_tech_params(g_ip->F_sz_um, false);//this init is for initializing wires + init_tech_params(g_ip->F_sz_um, false); // this init is for initializing wires Wire winit; // Do not delete this line. It initializes wires. -// g_tp.peri_global.display(); -// g_tp.sram_cell.display(); - - -// For HighRadix Only -// //// Wire wirea(g_ip->wt, 1000); -// //// wirea.print_wire(); -// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl; -// // winit.print_wire(); -// // -// HighRadix *hr; -// hr = new HighRadix(); -// hr->compute_power(); -// hr->print_router(); -// exit(0); -// -// double sub_switch_sz = 2; -// double rows = 32; -// for (int i=0; i<6; i++) { -// sub_switch_sz = pow(2, i); -// rows = 64/sub_switch_sz; -// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7); -// hr->compute_power(); -// hr->print_router(); -// delete hr; -// } -// // HighRadix yarc; -// // yarc.compute_power(); -// // yarc.print_router(); -// winit.print_wire(); -// exit(0); -// For HighRadix Only End - - if (g_ip->nuca == 1) - { + // g_tp.peri_global.display(); + // g_tp.sram_cell.display(); + + // For HighRadix Only + // //// Wire wirea(g_ip->wt, 1000); + // //// wirea.print_wire(); + // //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl; + // // winit.print_wire(); + // // + // HighRadix *hr; + // hr = new HighRadix(); + // hr->compute_power(); + // hr->print_router(); + // exit(0); + // + // double sub_switch_sz = 2; + // double rows = 32; + // for (int i=0; i<6; i++) { + // sub_switch_sz = pow(2, i); + // rows = 64/sub_switch_sz; + // hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7); + // hr->compute_power(); + // hr->print_router(); + // delete hr; + // } + // // HighRadix yarc; + // // yarc.compute_power(); + // // yarc.print_router(); + // winit.print_wire(); + // exit(0); + // For HighRadix Only End + + if (g_ip->nuca == 1) { Nuca n(&g_tp.peri_global); n.sim_nuca(); } - - //g_ip->display_ip(); + + // g_ip->display_ip(); solve(&fin_res); -// output_UCA(&fin_res); -// Wire::print_wire(); + // output_UCA(&fin_res); + // Wire::print_wire(); output_data_csv(fin_res); - if (!g_ip->dvs_voltage.empty()) - { - update_dvs(&fin_res); + if (!g_ip->dvs_voltage.empty()) { + update_dvs(&fin_res); } - if (g_ip->power_gating) - { - update_pg(&fin_res);//this is needed for compute area overhead of power-gating, even the gated power is calculated together un-gated leakage + if (g_ip->power_gating) { + update_pg(&fin_res); // this is needed for compute area overhead of + // power-gating, even the gated power is calculated + // together un-gated leakage } output_UCA(&fin_res); - Wire wprint;//reset wires to original configuration as in *.cfg file (dvs level 0) + Wire wprint; // reset wires to original configuration as in *.cfg file (dvs + // level 0) Wire::print_wire(); delete (g_ip); return fin_res; } -//cacti6.5's plain interface, please keep !!! +// cacti6.5's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports, - int excl_write_ports, - int single_ended_read_ports, + int cache_size, int line_size, int associativity, int rw_ports, + int excl_read_ports, int excl_write_ports, int single_ended_read_ports, int banks, double tech_node, // in nm - int page_sz, - int burst_length, - int pre_width, - int output_width, - int specific_tag, - int tag_width, - int access_mode, //0 normal, 1 seq, 2 fast - int cache, //scratch ram or cache - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, - int obj_func_leakage_power, - int obj_func_area, - int obj_func_cycle_time, - int dev_func_delay, - int dev_func_dynamic_power, - int dev_func_leakage_power, - int dev_func_area, - int dev_func_cycle_time, + int page_sz, int burst_length, int pre_width, int output_width, + int specific_tag, int tag_width, + int access_mode, // 0 normal, 1 seq, 2 fast + int cache, // scratch ram or cache + int main_mem, int obj_func_delay, int obj_func_dynamic_power, + int obj_func_leakage_power, int obj_func_area, int obj_func_cycle_time, + int dev_func_delay, int dev_func_dynamic_power, int dev_func_leakage_power, + int dev_func_area, int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay + // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, // 0-4 int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, // 0 - aggressive, 1 - normal - int wire_inside_mat_type_in, - int wire_outside_mat_type_in, + int wire_inside_mat_type_in, int wire_outside_mat_type_in, int is_nuca, // 0 - UCA, 1 - NUCA int core_count, int cache_level, // 0 - L2, 1 - L3 - int nuca_bank_count, - int nuca_obj_func_delay, - int nuca_obj_func_dynamic_power, - int nuca_obj_func_leakage_power, - int nuca_obj_func_area, - int nuca_obj_func_cycle_time, - int nuca_dev_func_delay, - int nuca_dev_func_dynamic_power, - int nuca_dev_func_leakage_power, - int nuca_dev_func_area, + int nuca_bank_count, int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, int nuca_obj_func_leakage_power, + int nuca_obj_func_area, int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, int nuca_dev_func_area, int nuca_dev_func_cycle_time, - int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported - int p_input) -{ + int REPEATERS_IN_HTREE_SEGMENTS_in, // TODO for now only wires with + // repeaters are supported + int p_input) { g_ip = new InputParameter(); g_ip->add_ecc_b_ = true; - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->ic_proj_type = interconnect_projection_type_in; g_ip->wire_is_mat_type = wire_inside_mat_type_in; g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = burst_length; - g_ip->int_prefetch_w = pre_width; - g_ip->page_sz_bits = page_sz; - - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; + g_ip->burst_len = burst_length; + g_ip->int_prefetch_w = pre_width; + g_ip->page_sz_bits = page_sz; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; if (tag_width == 0) { g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; + } else { + g_ip->tag_w = tag_width; } - g_ip->access_mode = access_mode; + g_ip->access_mode = access_mode; g_ip->delay_wt = obj_func_delay; g_ip->dynamic_power_wt = obj_func_dynamic_power; g_ip->leakage_power_wt = obj_func_leakage_power; g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->cycle_time_wt = obj_func_cycle_time; g_ip->delay_dev = dev_func_delay; g_ip->dynamic_power_dev = dev_func_dynamic_power; g_ip->leakage_power_dev = dev_func_leakage_power; g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->cycle_time_dev = dev_func_cycle_time; g_ip->ed = ed_ed2_none; - switch(wt) { + switch (wt) { case (0): g_ip->force_wiretype = 0; g_ip->wt = Global; @@ -1065,15 +988,15 @@ uca_org_t cacti_interface( g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; g_ip->area_wt_nuca = nuca_obj_func_area; - g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; + g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; g_ip->delay_dev_nuca = dev_func_delay; g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; g_ip->area_dev_nuca = nuca_dev_func_area; - g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; + g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; g_ip->nuca = is_nuca; g_ip->nuca_bank_count = nuca_bank_count; - if(nuca_bank_count > 0) { + if (nuca_bank_count > 0) { g_ip->force_nuca_bank = 1; } g_ip->cores = core_count; @@ -1081,15 +1004,15 @@ uca_org_t cacti_interface( g_ip->temp = temp; - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache != 0) ? true : false; + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache != 0) ? true : false; g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; g_ip->num_se_rd_ports = single_ended_read_ports; g_ip->print_detail = 1; g_ip->nuca = 0; @@ -1099,18 +1022,17 @@ uca_org_t cacti_interface( g_ip->force_wiretype = false; g_ip->print_input_args = p_input; - uca_org_t fin_res; fin_res.valid = false; - if (g_ip->error_checking() == false) exit(0); + if (g_ip->error_checking() == false) + exit(0); if (g_ip->print_input_args) g_ip->display_ip(); init_tech_params(g_ip->F_sz_um, false); Wire winit; // Do not delete this line. It initializes wires. - if (g_ip->nuca == 1) - { + if (g_ip->nuca == 1) { Nuca n(&g_tp.peri_global); n.sim_nuca(); } @@ -1122,176 +1044,146 @@ uca_org_t cacti_interface( return fin_res; } -//McPAT's plain interface, please keep !!! +// McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports,// para5 - int excl_write_ports, - int single_ended_read_ports, - int search_ports, + int cache_size, int line_size, int associativity, int rw_ports, + int excl_read_ports, // para5 + int excl_write_ports, int single_ended_read_ports, int search_ports, int banks, - double tech_node,//para10 - int output_width, - int specific_tag, - int tag_width, - int access_mode, - int cache, //para15 - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, + double tech_node, // para10 + int output_width, int specific_tag, int tag_width, int access_mode, + int cache, // para15 + int main_mem, int obj_func_delay, int obj_func_dynamic_power, int obj_func_leakage_power, - int obj_func_cycle_time, //para20 - int obj_func_area, - int dev_func_delay, - int dev_func_dynamic_power, + int obj_func_cycle_time, // para20 + int obj_func_area, int dev_func_delay, int dev_func_dynamic_power, int dev_func_leakage_power, - int dev_func_area, //para25 + int dev_func_area, // para25 int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing - int data_arr_ram_cell_tech_flavor_in,//para30 + int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay + // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, // para30 int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, - int wire_inside_mat_type_in,//para35 - int wire_outside_mat_type_in, - int REPEATERS_IN_HTREE_SEGMENTS_in, + int wire_inside_mat_type_in, // para35 + int wire_outside_mat_type_in, int REPEATERS_IN_HTREE_SEGMENTS_in, int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, - int PAGE_SIZE_BITS_in,//para40 - int BURST_LENGTH_in, - int INTERNAL_PREFETCH_WIDTH_in, - int force_wiretype, + int PAGE_SIZE_BITS_in, // para40 + int BURST_LENGTH_in, int INTERNAL_PREFETCH_WIDTH_in, int force_wiretype, int wiretype, - int force_config,//para45 - int ndwl, - int ndbl, - int nspd, - int ndcm, - int ndsam1,//para50 - int ndsam2, - int ecc) -{ + int force_config, // para45 + int ndwl, int ndbl, int nspd, int ndcm, + int ndsam1, // para50 + int ndsam2, int ecc) { g_ip = new InputParameter(); uca_org_t fin_res; fin_res.valid = false; - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->ic_proj_type = interconnect_projection_type_in; g_ip->wire_is_mat_type = wire_inside_mat_type_in; g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = BURST_LENGTH_in; - g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; - g_ip->page_sz_bits = PAGE_SIZE_BITS_in; - - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; + g_ip->burst_len = BURST_LENGTH_in; + g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; if (specific_tag == 0) { g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; + } else { + g_ip->tag_w = tag_width; } - g_ip->access_mode = access_mode; + g_ip->access_mode = access_mode; g_ip->delay_wt = obj_func_delay; g_ip->dynamic_power_wt = obj_func_dynamic_power; g_ip->leakage_power_wt = obj_func_leakage_power; g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->cycle_time_wt = obj_func_cycle_time; g_ip->delay_dev = dev_func_delay; g_ip->dynamic_power_dev = dev_func_dynamic_power; g_ip->leakage_power_dev = dev_func_leakage_power; g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->cycle_time_dev = dev_func_cycle_time; g_ip->temp = temp; g_ip->ed = ed_ed2_none; - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache ==1) ? true : false; - g_ip->pure_ram = (cache ==0) ? true : false; - g_ip->pure_cam = (cache ==2) ? true : false; + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache == 1) ? true : false; + g_ip->pure_ram = (cache == 0) ? true : false; + g_ip->pure_cam = (cache == 2) ? true : false; g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; - g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + g_ip->broadcast_addr_din_over_ver_htrees = + BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; g_ip->num_se_rd_ports = single_ended_read_ports; g_ip->num_search_ports = search_ports; g_ip->print_detail = 1; g_ip->nuca = 0; - if (force_wiretype == 0) - { - g_ip->wt = Global; - g_ip->force_wiretype = false; - } - else - { g_ip->force_wiretype = true; - if (wiretype==10) { - g_ip->wt = Global_10; - } - if (wiretype==20) { - g_ip->wt = Global_20; - } - if (wiretype==30) { - g_ip->wt = Global_30; - } - if (wiretype==5) { - g_ip->wt = Global_5; - } - if (wiretype==0) { - g_ip->wt = Low_swing; - } - } - //g_ip->wt = Global_5; - if (force_config == 0) - { - g_ip->force_cache_config = false; + if (force_wiretype == 0) { + g_ip->wt = Global; + g_ip->force_wiretype = false; + } else { + g_ip->force_wiretype = true; + if (wiretype == 10) { + g_ip->wt = Global_10; } - else - { - g_ip->force_cache_config = true; - g_ip->ndbl=ndbl; - g_ip->ndwl=ndwl; - g_ip->nspd=nspd; - g_ip->ndcm=ndcm; - g_ip->ndsam1=ndsam1; - g_ip->ndsam2=ndsam2; - - + if (wiretype == 20) { + g_ip->wt = Global_20; + } + if (wiretype == 30) { + g_ip->wt = Global_30; + } + if (wiretype == 5) { + g_ip->wt = Global_5; + } + if (wiretype == 0) { + g_ip->wt = Low_swing; } - - if (ecc==0){ - g_ip->add_ecc_b_=false; } - else - { - g_ip->add_ecc_b_=true; + // g_ip->wt = Global_5; + if (force_config == 0) { + g_ip->force_cache_config = false; + } else { + g_ip->force_cache_config = true; + g_ip->ndbl = ndbl; + g_ip->ndwl = ndwl; + g_ip->nspd = nspd; + g_ip->ndcm = ndcm; + g_ip->ndsam1 = ndsam1; + g_ip->ndsam2 = ndsam2; } + if (ecc == 0) { + g_ip->add_ecc_b_ = false; + } else { + g_ip->add_ecc_b_ = true; + } - if(!g_ip->error_checking()) - exit(0); + if (!g_ip->error_checking()) + exit(0); init_tech_params(g_ip->F_sz_um, false); Wire winit; // Do not delete this line. It initializes wires. @@ -1305,213 +1197,197 @@ uca_org_t cacti_interface( return fin_res; } - - -bool InputParameter::error_checking() -{ - int A; - bool seq_access = false; +bool InputParameter::error_checking() { + int A; + bool seq_access = false; fast_access = true; fully_assoc = false; - switch (access_mode) - { + switch (access_mode) { case 0: - seq_access = false; + seq_access = false; fast_access = false; break; case 1: - seq_access = true; + seq_access = true; fast_access = false; break; case 2: - seq_access = false; + seq_access = false; fast_access = true; break; } - if(is_main_mem) - { - if(ic_proj_type == 0) - { - cerr << "DRAM model supports only conservative interconnect projection!\n\n"; + if (is_main_mem) { + if (ic_proj_type == 0) { + cerr << "DRAM model supports only conservative interconnect " + "projection!\n\n"; return false; } } uint32_t B = line_sz; - if (B < 1) - { + if (B < 1) { - cerr << "Block size must >= 1" << endl; + cerr << "Block size must >= 1" << endl; return false; - } - else if (B*8 < out_w) - { - cerr << "Block size must be at least " << out_w/8 << endl; + } else if (B * 8 < out_w) { + cerr << "Block size must be at least " << out_w / 8 << endl; return false; } - if (F_sz_um <= 0) - { + if (F_sz_um <= 0) { cerr << "Feature size must be > 0" << endl; return false; - } - else if (F_sz_um > 0.181) - { + } else if (F_sz_um > 0.181) { cerr << "Feature size must be <= 180 nm" << endl; return false; - }else if (F_sz_um >0.091 && (data_arr_ram_cell_tech_type!= itrs_hp - || tag_arr_ram_cell_tech_type!= itrs_hp - || data_arr_peri_global_tech_type != itrs_hp - ||tag_arr_peri_global_tech_type != itrs_hp)) - { - cerr << "Feature size from 90nm to 180 nm only support the ITRS HP device type" << endl; - return false; + } else if (F_sz_um > 0.091 && (data_arr_ram_cell_tech_type != itrs_hp || + tag_arr_ram_cell_tech_type != itrs_hp || + data_arr_peri_global_tech_type != itrs_hp || + tag_arr_peri_global_tech_type != itrs_hp)) { + cerr << "Feature size from 90nm to 180 nm only support the ITRS HP device " + "type" + << endl; + return false; } - uint32_t RWP = num_rw_ports; - uint32_t ERP = num_rd_ports; - uint32_t EWP = num_wr_ports; + uint32_t RWP = num_rw_ports; + uint32_t ERP = num_rd_ports; + uint32_t EWP = num_wr_ports; uint32_t NSER = num_se_rd_ports; uint32_t SCHP = num_search_ports; -//TODO: revisit this. This is an important feature. Sheng thought this should be used -// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to -// // the number of banks, we assume that the multiple ports are implemented via the multiple banks. -// // In such a case we assume that each bank has 1 RWP port. -// if ((RWP + ERP + EWP) <= nbanks && nbanks>1) -// { -// RWP = 1; -// ERP = 0; -// EWP = 0; -// NSER = 0; -// } -// else if ((RWP < 0) || (EWP < 0) || (ERP < 0)) -// { -// cerr << "Ports must >=0" << endl; -// return false; -// } -// else if (RWP > 2) -// { -// cerr << "Maximum of 2 read/write ports" << endl; -// return false; -// } -// else if ((RWP+ERP+EWP) < 1) + // TODO: revisit this. This is an important feature. Sheng thought this should + // be used + // // If multiple banks and multiple ports are specified, then if number of + // ports is less than or equal to + // // the number of banks, we assume that the multiple ports are implemented + // via the multiple banks. + // // In such a case we assume that each bank has 1 RWP port. + // if ((RWP + ERP + EWP) <= nbanks && nbanks>1) + // { + // RWP = 1; + // ERP = 0; + // EWP = 0; + // NSER = 0; + // } + // else if ((RWP < 0) || (EWP < 0) || (ERP < 0)) + // { + // cerr << "Ports must >=0" << endl; + // return false; + // } + // else if (RWP > 2) + // { + // cerr << "Maximum of 2 read/write ports" << endl; + // return false; + // } + // else if ((RWP+ERP+EWP) < 1) // Changed to new implementation: // The number of ports specified at input is per bank - if ((RWP+ERP+EWP) < 1) - { + if ((RWP + ERP + EWP) < 1) { cerr << "Must have at least one port" << endl; return false; } - if (is_pow2(nbanks) == false) - { - cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl; + if (is_pow2(nbanks) == false) { + cerr << "Number of subbanks should be greater than or equal to 1 and " + "should be a power of 2" + << endl; return false; } - int C = cache_sz/nbanks; - if (C < 64) - { + int C = cache_sz / nbanks; + if (C < 64) { cerr << "Cache size must >=64" << endl; return false; } -//TODO: revisit this -// if (pure_ram==true && assoc!=1) -// { -// cerr << "Pure RAM must have assoc as 1" << endl; -// return false; -// } - - //fully assoc and cam check - if (is_cache && assoc==0) - fully_assoc =true; - else - fully_assoc = false; - - if (pure_cam==true && assoc!=0) - { - cerr << "Pure CAM must have associativity as 0" << endl; - return false; - } + // TODO: revisit this + // if (pure_ram==true && assoc!=1) + // { + // cerr << "Pure RAM must have assoc as 1" << endl; + // return false; + // } - if (assoc==0 && (pure_cam==false && is_cache ==false)) - { - cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl; - return false; - } + // fully assoc and cam check + if (is_cache && assoc == 0) + fully_assoc = true; + else + fully_assoc = false; - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type - || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type )) - { - cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl; - return false; - } + if (pure_cam == true && assoc != 0) { + cerr << "Pure CAM must have associativity as 0" << endl; + return false; + } - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram)) - { - cerr << "DRAM based CAM and fully associative cache are not supported" << endl; - return false; - } + if (assoc == 0 && (pure_cam == false && is_cache == false)) { + cerr << "Only CAM or Fully associative cache can have associativity as 0" + << endl; + return false; + } - if ((fully_assoc==true || pure_cam==true) - && (is_main_mem==true)) - { - cerr << "CAM and fully associative cache cannot be as main memory" << endl; - return false; - } + if ((fully_assoc == true || pure_cam == true) && + (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type || + data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) { + cerr << "CAM and fully associative cache must have same device type for " + "both data and tag array" + << endl; + return false; + } - if ((fully_assoc || pure_cam) && SCHP<1) - { - cerr << "CAM and fully associative must have at least 1 search port" << endl; - return false; - } + if ((fully_assoc == true || pure_cam == true) && + (data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram)) { + cerr << "DRAM based CAM and fully associative cache are not supported" + << endl; + return false; + } - if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam))) - { - ERP=SCHP; - } + if ((fully_assoc == true || pure_cam == true) && (is_main_mem == true)) { + cerr << "CAM and fully associative cache cannot be as main memory" << endl; + return false; + } -// if ((!(fully_assoc || pure_cam)) && SCHP>=1) -// { -// cerr << "None CAM and fully associative cannot have search ports" << endl; -// return false; -// } + if ((fully_assoc || pure_cam) && SCHP < 1) { + cerr << "CAM and fully associative must have at least 1 search port" + << endl; + return false; + } - if (assoc == 0) - { - A = C/B; - //fully_assoc = true; + if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) { + ERP = SCHP; } - else - { - if (assoc == 1) - { + + // if ((!(fully_assoc || pure_cam)) && SCHP>=1) + // { + // cerr << "None CAM and fully associative cannot have search ports" << + // endl; return false; + // } + + if (assoc == 0) { + A = C / B; + // fully_assoc = true; + } else { + if (assoc == 1) { A = 1; - //fully_assoc = false; - } - else - { - //fully_assoc = false; + // fully_assoc = false; + } else { + // fully_assoc = false; A = assoc; - if (is_pow2(A) == false) - { + if (is_pow2(A) == false) { cerr << "Associativity must be a power of 2" << endl; return false; } } } - if (C/(B*A) <= 1 && assoc!=0) - { + if (C / (B * A) <= 1 && assoc != 0) { cerr << "Number of sets is too small: " << endl; - cerr << " Need to either increase cache size, or decrease associativity or block size" << endl; + cerr << " Need to either increase cache size, or decrease associativity or " + "block size" + << endl; cerr << " (or use fully associative cache)" << endl; return false; } @@ -1519,157 +1395,141 @@ bool InputParameter::error_checking() block_sz = B; /*dt: testing sequential access mode*/ - if(seq_access) - { - tag_assoc = A; + if (seq_access) { + tag_assoc = A; data_assoc = 1; is_seq_acc = true; - } - else - { - tag_assoc = A; + } else { + tag_assoc = A; data_assoc = A; is_seq_acc = false; } - if (assoc==0) - { + if (assoc == 0) { data_assoc = 1; } - num_rw_ports = RWP; - num_rd_ports = ERP; - num_wr_ports = EWP; - num_se_rd_ports = NSER; + num_rw_ports = RWP; + num_rd_ports = ERP; + num_wr_ports = EWP; + num_se_rd_ports = NSER; if (!(fully_assoc || pure_cam)) num_search_ports = 0; - nsets = C/(B*A); + nsets = C / (B * A); - if (temp < 300 || temp > 400 || temp%10 != 0) - { - cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl; + if (temp < 300 || temp > 400 || temp % 10 != 0) { + cerr + << temp + << " Temperature must be between 300 and 400 Kelvin and multiple of 10." + << endl; return false; } - if (nsets < 1) - { + if (nsets < 1) { cerr << "Less than one set..." << endl; return false; } -// power_gating = (array_power_gated -// || bitline_floating -// || wl_power_gated -// || cl_power_gated -// || interconect_power_gated)?true:false; - - if (power_gating) - { - array_power_gated = true; - bitline_floating = true; - wl_power_gated = true; - cl_power_gated = true; - interconect_power_gated = true; - } - else - { - array_power_gated = false; - bitline_floating = false; - wl_power_gated = false; - cl_power_gated = false; - interconect_power_gated = false; + // power_gating = (array_power_gated + // || bitline_floating + // || wl_power_gated + // || cl_power_gated + // || interconect_power_gated)?true:false; + + if (power_gating) { + array_power_gated = true; + bitline_floating = true; + wl_power_gated = true; + cl_power_gated = true; + interconect_power_gated = true; + } else { + array_power_gated = false; + bitline_floating = false; + wl_power_gated = false; + cl_power_gated = false; + interconect_power_gated = false; } -// if (power_gating && (!dvs_voltage.empty())) -// { -// cerr << "Power gating and DVS cannot be active simultaneously, please model them in two runs.\n\n"; -// return false; -// } - - if (power_gating && (pure_cam||fully_assoc)) - { - cerr << "Power gating in CAM is not supported yet.\n\n"<< endl; - return false; - } + // if (power_gating && (!dvs_voltage.empty())) + // { + // cerr << "Power gating and DVS cannot be active simultaneously, please + // model them in two runs.\n\n"; return false; + // } - if (power_gating && (is_main_mem - ||data_arr_ram_cell_tech_type== lp_dram - ||data_arr_ram_cell_tech_type== comm_dram - ||tag_arr_ram_cell_tech_type== lp_dram - ||tag_arr_ram_cell_tech_type== comm_dram - ||data_arr_peri_global_tech_type== lp_dram - ||data_arr_peri_global_tech_type== comm_dram - ||tag_arr_peri_global_tech_type== lp_dram - || tag_arr_peri_global_tech_type== comm_dram)) - { - cerr << "Power gating in DRAM is not supported. \n\n"<< endl; - return false; - } + if (power_gating && (pure_cam || fully_assoc)) { + cerr << "Power gating in CAM is not supported yet.\n\n" << endl; + return false; + } - if (long_channel_device && (is_main_mem - ||data_arr_ram_cell_tech_type== lp_dram - ||data_arr_ram_cell_tech_type== comm_dram - ||tag_arr_ram_cell_tech_type== lp_dram - ||tag_arr_ram_cell_tech_type== comm_dram - ||data_arr_peri_global_tech_type== lp_dram - ||data_arr_peri_global_tech_type== comm_dram - ||tag_arr_peri_global_tech_type== lp_dram - || tag_arr_peri_global_tech_type== comm_dram)) - { - cerr << "Long Channel Device in DRAM is not supported. \n\n"<< endl; - return false; - } + if (power_gating && (is_main_mem || data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram || + tag_arr_ram_cell_tech_type == lp_dram || + tag_arr_ram_cell_tech_type == comm_dram || + data_arr_peri_global_tech_type == lp_dram || + data_arr_peri_global_tech_type == comm_dram || + tag_arr_peri_global_tech_type == lp_dram || + tag_arr_peri_global_tech_type == comm_dram)) { + cerr << "Power gating in DRAM is not supported. \n\n" << endl; + return false; + } - if ((!dvs_voltage.empty()) && (is_main_mem - ||data_arr_ram_cell_tech_type== lp_dram - ||data_arr_ram_cell_tech_type== comm_dram - ||tag_arr_ram_cell_tech_type== lp_dram - ||tag_arr_ram_cell_tech_type== comm_dram - ||data_arr_peri_global_tech_type== lp_dram - ||data_arr_peri_global_tech_type== comm_dram - ||tag_arr_peri_global_tech_type== lp_dram - || tag_arr_peri_global_tech_type== comm_dram)) - { - cerr << "DVS in DRAM is not supported. \n\n"<< endl; - return false; - } + if (long_channel_device && + (is_main_mem || data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram || + tag_arr_ram_cell_tech_type == lp_dram || + tag_arr_ram_cell_tech_type == comm_dram || + data_arr_peri_global_tech_type == lp_dram || + data_arr_peri_global_tech_type == comm_dram || + tag_arr_peri_global_tech_type == lp_dram || + tag_arr_peri_global_tech_type == comm_dram)) { + cerr << "Long Channel Device in DRAM is not supported. \n\n" << endl; + return false; + } -// if (power_gating && (specific_hp_vdd -// || specific_lstp_vdd -// || specific_lop_vdd)) -// { -// cerr << "Default Vdd is recommended when enabling power gating.\n\n"<< endl; -// return false; -// } + if ((!dvs_voltage.empty()) && + (is_main_mem || data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram || + tag_arr_ram_cell_tech_type == lp_dram || + tag_arr_ram_cell_tech_type == comm_dram || + data_arr_peri_global_tech_type == lp_dram || + data_arr_peri_global_tech_type == comm_dram || + tag_arr_peri_global_tech_type == lp_dram || + tag_arr_peri_global_tech_type == comm_dram)) { + cerr << "DVS in DRAM is not supported. \n\n" << endl; + return false; + } - if ((!dvs_voltage.empty())&& ((data_arr_ram_cell_tech_type !=data_arr_peri_global_tech_type) - ||(tag_arr_peri_global_tech_type !=tag_arr_ram_cell_tech_type) - ||(data_arr_ram_cell_tech_type !=tag_arr_ram_cell_tech_type))) - { - cerr << "Same device types is recommended for tag/data/cell/peripheral for DVS. Same DVS voltage will be applied to different device types\n\n"; + // if (power_gating && (specific_hp_vdd + // || specific_lstp_vdd + // || specific_lop_vdd)) + // { + // cerr << "Default Vdd is recommended when enabling power gating.\n\n"<< + // endl; return false; + // } + + if ((!dvs_voltage.empty()) && + ((data_arr_ram_cell_tech_type != data_arr_peri_global_tech_type) || + (tag_arr_peri_global_tech_type != tag_arr_ram_cell_tech_type) || + (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type))) { + cerr << "Same device types is recommended for tag/data/cell/peripheral for " + "DVS. Same DVS voltage will be applied to different device " + "types\n\n"; return false; } return true; } - - -void output_data_csv(const uca_org_t & fin_res) -{ - //TODO: the csv output should remain +void output_data_csv(const uca_org_t &fin_res) { + // TODO: the csv output should remain fstream file("out.csv", ios::in); - bool print_index = file.fail(); + bool print_index = file.fail(); file.close(); - file.open("out.csv", ios::out|ios::app); - if (file.fail() == true) - { + file.open("out.csv", ios::out | ios::app); + if (file.fail() == true) { cerr << "File out.csv could not be opened successfully" << endl; - } - else - { - if (print_index == true) - { + } else { + if (print_index == true) { file << "Tech node (nm), "; file << "Capacity (bytes), "; file << "Number of banks, "; @@ -1677,25 +1537,25 @@ void output_data_csv(const uca_org_t & fin_res) file << "Output width (bits), "; file << "Access time (ns), "; file << "Random cycle time (ns), "; -// file << "Multisubbank interleave cycle time (ns), "; - -// file << "Delay request network (ns), "; -// file << "Delay inside mat (ns), "; -// file << "Delay reply network (ns), "; -// file << "Tag array access time (ns), "; -// file << "Data array access time (ns), "; -// file << "Refresh period (microsec), "; -// file << "DRAM array availability (%), "; + // file << "Multisubbank interleave cycle time (ns), "; + + // file << "Delay request network (ns), "; + // file << "Delay inside mat (ns), "; + // file << "Delay reply network (ns), "; + // file << "Tag array access time (ns), "; + // file << "Data array access time (ns), "; + // file << "Refresh period (microsec), "; + // file << "DRAM array availability (%), "; file << "Dynamic search energy (nJ), "; file << "Dynamic read energy (nJ), "; file << "Dynamic write energy (nJ), "; -// file << "Tag Dynamic read energy (nJ), "; -// file << "Data Dynamic read energy (nJ), "; -// file << "Dynamic read power (mW), "; + // file << "Tag Dynamic read energy (nJ), "; + // file << "Data Dynamic read energy (nJ), "; + // file << "Dynamic read power (mW), "; file << "Standby leakage per bank(mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Refresh power as percentage of standby leakage, "; + // file << "Leakage per bank with leak power management (mW), "; + // file << "Leakage per bank with leak power management (mW), "; + // file << "Refresh power as percentage of standby leakage, "; file << "Area (mm2), "; file << "Ndwl, "; file << "Ndbl, "; @@ -1712,33 +1572,33 @@ void output_data_csv(const uca_org_t & fin_res) file << "Ntsam_level_2, "; file << "Tag arrary area efficiency %, "; -// file << "Resistance per unit micron (ohm-micron), "; -// file << "Capacitance per unit micron (fF per micron), "; -// file << "Unit-length wire delay (ps), "; -// file << "FO4 delay (ps), "; -// file << "delay route to bank (including crossb delay) (ps), "; -// file << "Crossbar delay (ps), "; -// file << "Dyn read energy per access from closed page (nJ), "; -// file << "Dyn read energy per access from open page (nJ), "; -// file << "Leak power of an subbank with page closed (mW), "; -// file << "Leak power of a subbank with page open (mW), "; -// file << "Leak power of request and reply networks (mW), "; -// file << "Number of subbanks, "; -// file << "Page size in bits, "; -// file << "Activate power, "; -// file << "Read power, "; -// file << "Write power, "; -// file << "Precharge power, "; -// file << "tRCD, "; -// file << "CAS latency, "; -// file << "Precharge delay, "; -// file << "Perc dyn energy bitlines, "; -// file << "perc dyn energy wordlines, "; -// file << "perc dyn energy outside mat, "; -// file << "Area opt (perc), "; -// file << "Delay opt (perc), "; -// file << "Repeater opt (perc), "; -// file << "Aspect ratio"; + // file << "Resistance per unit micron (ohm-micron), "; + // file << "Capacitance per unit micron (fF per micron), "; + // file << "Unit-length wire delay (ps), "; + // file << "FO4 delay (ps), "; + // file << "delay route to bank (including crossb delay) (ps), "; + // file << "Crossbar delay (ps), "; + // file << "Dyn read energy per access from closed page (nJ), "; + // file << "Dyn read energy per access from open page (nJ), "; + // file << "Leak power of an subbank with page closed (mW), "; + // file << "Leak power of a subbank with page open (mW), "; + // file << "Leak power of request and reply networks (mW), "; + // file << "Number of subbanks, "; + // file << "Page size in bits, "; + // file << "Activate power, "; + // file << "Read power, "; + // file << "Write power, "; + // file << "Precharge power, "; + // file << "tRCD, "; + // file << "CAS latency, "; + // file << "Precharge delay, "; + // file << "Perc dyn energy bitlines, "; + // file << "perc dyn energy wordlines, "; + // file << "perc dyn energy outside mat, "; + // file << "Area opt (perc), "; + // file << "Delay opt (perc), "; + // file << "Repeater opt (perc), "; + // file << "Aspect ratio"; file << endl; } file << g_ip->F_sz_nm << ", "; @@ -1746,56 +1606,59 @@ void output_data_csv(const uca_org_t & fin_res) file << g_ip->nbanks << ", "; file << g_ip->tag_assoc << ", "; file << g_ip->out_w << ", "; - file << fin_res.access_time*1e+9 << ", "; - file << fin_res.cycle_time*1e+9 << ", "; -// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; -// file << fin_res.data_array2->delay_request_network*1e+9 << ", "; -// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; -// file << fin_res.data_array2.delay_reply_network*1e+9 << ", "; - -// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) -// { -// file << fin_res.tag_array2->access_time*1e+9 << ", "; -// } -// else -// { -// file << 0 << ", "; -// } -// file << fin_res.data_array2->access_time*1e+9 << ", "; -// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; -// file << fin_res.data_array2->dram_array_availability << ", "; - if (g_ip->fully_assoc || g_ip->pure_cam) - { - file << fin_res.power.searchOp.dynamic*1e+9 << ", "; - } - else - { - file << "N/A" << ", "; - } - file << fin_res.power.readOp.dynamic*1e+9 << ", "; - file << fin_res.power.writeOp.dynamic*1e+9 << ", "; -// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) -// { -// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; -// } -// else -// { -// file << "NA" << ", "; -// } -// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", "; -// if (g_ip->fully_assoc || g_ip->pure_cam) -// { -// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", "; -// } -// else -// { -// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", "; -// } - - file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; -// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; -// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; - file << fin_res.area*1e-6 << ", "; + file << fin_res.access_time * 1e+9 << ", "; + file << fin_res.cycle_time * 1e+9 << ", "; + // file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 + // << ", "; file << fin_res.data_array2->delay_request_network*1e+9 << ", + // "; file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; file + // << fin_res.data_array2.delay_reply_network*1e+9 << ", "; + + // if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) + // { + // file << fin_res.tag_array2->access_time*1e+9 << ", "; + // } + // else + // { + // file << 0 << ", "; + // } + // file << fin_res.data_array2->access_time*1e+9 << ", "; + // file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; + // file << fin_res.data_array2->dram_array_availability << ", "; + if (g_ip->fully_assoc || g_ip->pure_cam) { + file << fin_res.power.searchOp.dynamic * 1e+9 << ", "; + } else { + file << "N/A" + << ", "; + } + file << fin_res.power.readOp.dynamic * 1e+9 << ", "; + file << fin_res.power.writeOp.dynamic * 1e+9 << ", "; + // if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) + // { + // file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; + // } + // else + // { + // file << "NA" << ", "; + // } + // file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", "; + // if (g_ip->fully_assoc || g_ip->pure_cam) + // { + // file << + // fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", "; + // } + // else + // { + // file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << + // ", "; + // } + + file << (fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage) * + 1000 + << ", "; + // file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", + // "; file << fin_res.data_array.refresh_power / + // fin_res.data_array.total_power.readOp.leakage << ", "; + file << fin_res.area * 1e-6 << ", "; file << fin_res.data_array2->Ndwl << ", "; file << fin_res.data_array2->Ndbl << ", "; @@ -1804,270 +1667,298 @@ void output_data_csv(const uca_org_t & fin_res) file << fin_res.data_array2->Ndsam_lev_1 << ", "; file << fin_res.data_array2->Ndsam_lev_2 << ", "; file << fin_res.data_array2->area_efficiency << ", "; - if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) - { - file << fin_res.tag_array2->Ndwl << ", "; - file << fin_res.tag_array2->Ndbl << ", "; - file << fin_res.tag_array2->Nspd << ", "; - file << fin_res.tag_array2->deg_bl_muxing << ", "; - file << fin_res.tag_array2->Ndsam_lev_1 << ", "; - file << fin_res.tag_array2->Ndsam_lev_2 << ", "; - file << fin_res.tag_array2->area_efficiency << ", "; - } - else - { - file << "N/A" << ", "; - file << "N/A"<< ", "; - file << "N/A" << ", "; - file << "N/A" << ", "; - file << "N/A" << ", "; - file << "N/A" << ", "; - file << "N/A" << ", "; - } - -// file << g_tp.wire_inside_mat.R_per_um << ", "; -// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; -// file << g_tp.unit_len_wire_del / 1e-12 << ", "; -// file << g_tp.FO4 / 1e-12 << ", "; -// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", "; -// file << fin_res.data_array.delay_crossbar / 1e-9 << ", "; -// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", "; -// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", "; -// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", "; -// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", "; -// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", "; -// file << fin_res.data_array.number_subbanks << ", " ; -// file << fin_res.data_array.page_size_in_bits << ", " ; -// file << fin_res.data_array.activate_energy * 1e9 << ", " ; -// file << fin_res.data_array.read_energy * 1e9 << ", " ; -// file << fin_res.data_array.write_energy * 1e9 << ", " ; -// file << fin_res.data_array.precharge_energy * 1e9 << ", " ; -// file << fin_res.data_array.trcd * 1e9 << ", " ; -// file << fin_res.data_array.cas_latency * 1e9 << ", " ; -// file << fin_res.data_array.precharge_delay * 1e9 << ", " ; -// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; - file<fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) { + file << fin_res.tag_array2->Ndwl << ", "; + file << fin_res.tag_array2->Ndbl << ", "; + file << fin_res.tag_array2->Nspd << ", "; + file << fin_res.tag_array2->deg_bl_muxing << ", "; + file << fin_res.tag_array2->Ndsam_lev_1 << ", "; + file << fin_res.tag_array2->Ndsam_lev_2 << ", "; + file << fin_res.tag_array2->area_efficiency << ", "; + } else { + file << "N/A" + << ", "; + file << "N/A" + << ", "; + file << "N/A" + << ", "; + file << "N/A" + << ", "; + file << "N/A" + << ", "; + file << "N/A" + << ", "; + file << "N/A" + << ", "; + } + + // file << g_tp.wire_inside_mat.R_per_um << ", "; + // file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; + // file << g_tp.unit_len_wire_del / 1e-12 << ", "; + // file << g_tp.FO4 / 1e-12 << ", "; + // file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", "; + // file << fin_res.data_array.delay_crossbar / 1e-9 << ", "; + // file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << + // ", "; file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 + // << ", "; file << fin_res.data_array.leak_power_subbank_closed_page / + // 1e-3 << ", "; file << fin_res.data_array.leak_power_subbank_open_page + // / 1e-3 << ", "; file << + // fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", + // "; file << fin_res.data_array.number_subbanks << ", " ; file << + // fin_res.data_array.page_size_in_bits << ", " ; file << + // fin_res.data_array.activate_energy * 1e9 << ", " ; file << + // fin_res.data_array.read_energy * 1e9 << ", " ; file << + // fin_res.data_array.write_energy * 1e9 << ", " ; file << + // fin_res.data_array.precharge_energy * 1e9 << ", " ; file << + // fin_res.data_array.trcd * 1e9 << ", " ; file << + // fin_res.data_array.cas_latency * 1e9 << ", " ; file << + // fin_res.data_array.precharge_delay * 1e9 << ", " ; file << + // fin_res.data_array.all_banks_height / + // fin_res.data_array.all_banks_width; + file << endl; } file.close(); } - - -void output_UCA(uca_org_t *fr) -{ - -double long_channel_leakage_reduction = 0.1 + 0.9*(0.8*fr->data_array2->long_channel_leakage_reduction_memcell - + 0.2*fr->data_array2->long_channel_leakage_reduction_periperal);//TODO -double areaoverhead, overhead_data, overhead_tag; -double wakeup_E, wakeup_T, wakeup_E_data, wakeup_T_data, wakeup_E_tag, wakeup_T_tag; -int dvs_levels = g_ip->dvs_voltage.size(); -int i; -bool dvs = !g_ip->dvs_voltage.empty(); - // if (NUCA) +void output_UCA(uca_org_t *fr) { + + double long_channel_leakage_reduction = + 0.1 + + 0.9 * (0.8 * fr->data_array2->long_channel_leakage_reduction_memcell + + 0.2 * fr->data_array2 + ->long_channel_leakage_reduction_periperal); // TODO + double areaoverhead, overhead_data, overhead_tag; + double wakeup_E, wakeup_T, wakeup_E_data, wakeup_T_data, wakeup_E_tag, + wakeup_T_tag; + int dvs_levels = g_ip->dvs_voltage.size(); + int i; + bool dvs = !g_ip->dvs_voltage.empty(); + // if (NUCA) if (0) { cout << "\n\n Detailed Bank Stats:\n"; - cout << " Bank Size (bytes): %d\n" << - (int) (g_ip->cache_sz); - } - else { + cout << " Bank Size (bytes): %d\n" << (int)(g_ip->cache_sz); + } else { if (g_ip->data_arr_ram_cell_tech_type == 3) { - cout << "\n---------- CACTI-P, with new features: "<data_arr_ram_cell_tech_type == 4) { - cout << "\n---------- CACTI-P, with new features: "<data_arr_ram_cell_tech_type == 4) { + cout << "\n---------- CACTI-P, with new features: " << VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI << "), Uniform" + << "Cache Access Commodity DRAM Model ----------\n"; + } else { + cout << "\n---------- CACTI-P, with new features: " << VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI + << "), Uniform Cache Access " + "SRAM Model ----------\n"; + } + // (version "<< VER_MAJOR_CACTI <<"."<< VER_MINOR_CACTI<< + // VER_postfix_CACTI<<"; (version "<< VER_MAJOR_CACTI <<"."<< + // VER_MINOR_CACTI<<"."VER_COMMENT_CACTI << VER_postfix_CACTI + // << " of " + // << VER_UPDATE_CACTI << ") cout << "\nCache Parameters:\n"; - cout << " Total cache size (bytes): " << - (int) (g_ip->cache_sz) << endl; + cout << " Total cache size (bytes): " << (int)(g_ip->cache_sz) << endl; } - cout << " Number of banks: " << (int) g_ip->nbanks << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) + cout << " Number of banks: " << (int)g_ip->nbanks << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) cout << " Associativity: fully associative\n"; else { if (g_ip->tag_assoc == 1) cout << " Associativity: direct mapped\n"; else - cout << " Associativity: " << - g_ip->tag_assoc << endl; + cout << " Associativity: " << g_ip->tag_assoc << endl; } - cout << " Block size (bytes): " << g_ip->line_sz << endl; - cout << " Read/write Ports: " << - g_ip->num_rw_ports << endl; - cout << " Read ports: " << - g_ip->num_rd_ports << endl; - cout << " Write ports: " << - g_ip->num_wr_ports << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) - cout << " search ports: " << - g_ip->num_search_ports << endl; - cout << " Technology size (nm): " << - g_ip->F_sz_nm << endl << endl; - - - cout << " Access time (ns): " << fr->access_time*1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->access_time*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << " Read/write Ports: " << g_ip->num_rw_ports << endl; + cout << " Read ports: " << g_ip->num_rd_ports << endl; + cout << " Write ports: " << g_ip->num_wr_ports << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) + cout << " search ports: " << g_ip->num_search_ports << endl; + cout << " Technology size (nm): " << g_ip->F_sz_nm << endl << endl; + + cout << " Access time (ns): " << fr->access_time * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->access_time * 1e9 << " (@DVS_Level" << i + 1 + << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; } cout << endl; - cout << " Cycle time (ns): " << fr->cycle_time*1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->cycle_time*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - if (g_ip->data_arr_ram_cell_tech_type >= 4) { - cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; - cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; - cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; - cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; - cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; - cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; - cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; - cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; - cout << " Refresh power (mW): " << - fr->data_array2->refresh_power*1e3 << endl; + cout << " Cycle time (ns): " << fr->cycle_time * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->cycle_time * 1e9 << " (@DVS_Level" << i + 1 + << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; } - else { - if ((g_ip->fully_assoc|| g_ip->pure_cam)) - { - cout << " Total dynamic associative search energy per access (nJ): " << - fr->power.searchOp.dynamic*1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->power.searchOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - // cout << " Total dynamic read energy per access (nJ): " << - // fr->power.readOp.dynamic*1e9 << endl; - // cout << " Total dynamic write energy per access (nJ): " << - // fr->power.writeOp.dynamic*1e9 << endl; - } - // else - // { - cout << " Total dynamic read energy per access (nJ): " << - fr->power.readOp.dynamic*1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->power.readOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - cout << " Total dynamic write energy per access (nJ): " << - fr->power.writeOp.dynamic*1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->power.writeOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - // } - if (g_ip->power_gating) - { - cout << " Total leakage power of a bank, with power-gating "; - if (!g_ip->user_defined_vcc_underflow) - { - cout << "(state retained)"; - } - else - { - cout << "(non state retained)"; - } - - cout <<", including its network outside" //power gated with retaining memory content - " (mW): " << (g_ip->long_channel_device ? fr->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->power.readOp.power_gated_leakage)*1e3<long_channel_device ? fr->power.readOp.leakage*long_channel_leakage_reduction : fr->power.readOp.leakage)*1e3; - - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device ? fr->uca_q[i]->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->power.readOp.leakage)*1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << endl; + if (g_ip->data_arr_ram_cell_tech_type >= 4) { + cout << " Precharge Delay (ns): " + << fr->data_array2->precharge_delay * 1e9 << endl; + cout << " Activate Energy (nJ): " + << fr->data_array2->activate_energy * 1e9 << endl; + cout << " Read Energy (nJ): " << fr->data_array2->read_energy * 1e9 + << endl; + cout << " Write Energy (nJ): " << fr->data_array2->write_energy * 1e9 + << endl; + cout << " Precharge Energy (nJ): " + << fr->data_array2->precharge_energy * 1e9 << endl; + cout << " Leakage Power Closed Page (mW): " + << fr->data_array2->leak_power_subbank_closed_page * 1e3 << endl; + cout << " Leakage Power Open Page (mW): " + << fr->data_array2->leak_power_subbank_open_page * 1e3 << endl; + cout << " Leakage Power I/O (mW): " + << fr->data_array2->leak_power_request_and_reply_networks * 1e3 + << endl; + cout << " Refresh power (mW): " << fr->data_array2->refresh_power * 1e3 + << endl; + } else { + if ((g_ip->fully_assoc || g_ip->pure_cam)) { + cout << " Total dynamic associative search energy per access (nJ): " + << fr->power.searchOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->power.searchOp.dynamic * 1e9 << " (@DVS_Level" + << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; + } + cout << endl; + // cout << " Total dynamic read energy per access + //(nJ): " << fr->power.readOp.dynamic*1e9 << endl; + // cout << " Total dynamic write energy per access (nJ): " << + // fr->power.writeOp.dynamic*1e9 + //<< endl; + } + // else + // { + cout << " Total dynamic read energy per access (nJ): " + << fr->power.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->power.readOp.dynamic * 1e9 << " (@DVS_Level" + << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; + } + cout << endl; + cout << " Total dynamic write energy per access (nJ): " + << fr->power.writeOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->power.writeOp.dynamic * 1e9 << " (@DVS_Level" + << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; + } + cout << endl; + // } + if (g_ip->power_gating) { + cout << " Total leakage power of a bank, with power-gating "; + if (!g_ip->user_defined_vcc_underflow) { + cout << "(state retained)"; + } else { + cout << "(non state retained)"; + } + + cout << ", including its network outside" // power gated with retaining + // memory content + " (mW): " + << (g_ip->long_channel_device + ? fr->power.readOp.power_gated_leakage * + long_channel_leakage_reduction + : fr->power.readOp.power_gated_leakage) * + 1e3 + << endl; + } + cout << " Total leakage power of a bank without power gating, including " + "its network outside" + " (mW): " + << (g_ip->long_channel_device + ? fr->power.readOp.leakage * long_channel_leakage_reduction + : fr->power.readOp.leakage) * + 1e3; + + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i]->power.readOp.leakage * + long_channel_leakage_reduction + : fr->uca_q[i]->power.readOp.leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; } - if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4) - { + if (g_ip->data_arr_ram_cell_tech_type == 3 || + g_ip->data_arr_ram_cell_tech_type == 4) { } - cout << " Cache height x width (mm): " << - fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl; + cout << " Cache height x width (mm): " << fr->cache_ht * 1e-3 << " x " + << fr->cache_len * 1e-3 << endl; cout << endl; - if (g_ip->power_gating) - { - /* Energy/Power stats */ - cout << " Power-gating results (The virtual power supply for gated circuit can only retain the state of idle circuit, not for operating the circuit):" << endl; - /* Data array power-gating stats */ - if (g_ip->user_defined_vcc_underflow) - { - cout<<" Warning: user defined power gating voltage is too low to retain state; Please understand the implications of deep sleep state on non state retaining and cold start effects when waking up the structure."<cache_ht*fr->cache_len/fr->uca_pg_reference->cache_ht/fr->uca_pg_reference->cache_len-1)*100;//% - cout << " \tPower gating circuits (sleep transistors) induced area overhead: " << - areaoverhead << " % " << endl ; - wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy - + fr->data_array2->wl_sleep_wakeup_energy - + fr->data_array2->bl_floating_wakeup_energy; - wakeup_T = wakeup_T_data=MAX(fr->data_array2->sram_sleep_wakeup_latency, - MAX(fr->data_array2->wl_sleep_wakeup_latency,fr->data_array2->bl_floating_wakeup_latency)); - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy - + fr->tag_array2->wl_sleep_wakeup_energy - + fr->tag_array2->bl_floating_wakeup_energy; - wakeup_T_tag=MAX(fr->tag_array2->sram_sleep_wakeup_latency, - MAX(fr->tag_array2->wl_sleep_wakeup_latency,fr->tag_array2->bl_floating_wakeup_latency)); - - wakeup_E += wakeup_E_tag; - wakeup_T = MAX(wakeup_T_tag, wakeup_T_data); - - } - cout << " \tPower gating Wakeup Latency (ns): " << - wakeup_T*1e9 << endl ; - cout << " \tPower gating Wakeup Energy (nJ): " << - wakeup_E*1e9 << endl ; + if (g_ip->power_gating) { + /* Energy/Power stats */ + cout << " Power-gating results (The virtual power supply for gated " + "circuit can only retain the state of idle circuit, not for " + "operating the circuit):" + << endl; + /* Data array power-gating stats */ + if (g_ip->user_defined_vcc_underflow) { + cout << " Warning: user defined power gating voltage is too low to " + "retain state; Please understand the implications of deep sleep " + "state on non state retaining and cold start effects when waking " + "up the structure." + << endl; + } else { + cout << " Power-gating results when retaining state" << endl; + } + areaoverhead = + (fr->cache_ht * fr->cache_len / fr->uca_pg_reference->cache_ht / + fr->uca_pg_reference->cache_len - + 1) * + 100; //% + cout << " \tPower gating circuits (sleep transistors) induced area " + "overhead: " + << areaoverhead << " % " << endl; + wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy + + fr->data_array2->wl_sleep_wakeup_energy + + fr->data_array2->bl_floating_wakeup_energy; + wakeup_T = wakeup_T_data = + MAX(fr->data_array2->sram_sleep_wakeup_latency, + MAX(fr->data_array2->wl_sleep_wakeup_latency, + fr->data_array2->bl_floating_wakeup_latency)); + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy + + fr->tag_array2->wl_sleep_wakeup_energy + + fr->tag_array2->bl_floating_wakeup_energy; + wakeup_T_tag = MAX(fr->tag_array2->sram_sleep_wakeup_latency, + MAX(fr->tag_array2->wl_sleep_wakeup_latency, + fr->tag_array2->bl_floating_wakeup_latency)); + + wakeup_E += wakeup_E_tag; + wakeup_T = MAX(wakeup_T_tag, wakeup_T_data); + } + cout << " \tPower gating Wakeup Latency (ns): " << wakeup_T * 1e9 + << endl; + cout << " \tPower gating Wakeup Energy (nJ): " << wakeup_E * 1e9 << endl; } - cout <data_array2->Ndwl << endl; cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; cout << " Best Nspd : " << fr->data_array2->Nspd << endl; cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; - cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; + cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl + << endl; - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; @@ -2078,1493 +1969,1906 @@ bool dvs = !g_ip->dvs_voltage.empty(); switch (fr->data_array2->wt) { case (0): - cout << " Data array, H-tree wire type: Delay optimized global wires\n"; + cout + << " Data array, H-tree wire type: Delay optimized global wires\n"; break; case (1): - cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; + cout << " Data array, H-tree wire type: Global wires with 5\% delay " + "penalty\n"; break; case (2): - cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; + cout << " Data array, H-tree wire type: Global wires with 10\% delay " + "penalty\n"; break; case (3): - cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; + cout << " Data array, H-tree wire type: Global wires with 20\% delay " + "penalty\n"; break; case (4): - cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; + cout << " Data array, H-tree wire type: Global wires with 30\% delay " + "penalty\n"; break; case (5): - cout << " Data array, wire type: Low swing wires\n"; + cout << " Data array, wire type: Low swing wires\n"; break; default: - cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <data_array2->wt << endl; exit(0); } - if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) { + if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { switch (fr->tag_array2->wt) { case (0): - cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; + cout << " Tag array, H-tree wire type: Delay optimized global " + "wires\n"; break; case (1): - cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; + cout << " Tag array, H-tree wire type: Global wires with 5\% delay " + "penalty\n"; break; case (2): - cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; + cout << " Tag array, H-tree wire type: Global wires with 10\% delay " + "penalty\n"; break; case (3): - cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; + cout << " Tag array, H-tree wire type: Global wires with 20\% delay " + "penalty\n"; break; case (4): - cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; + cout << " Tag array, H-tree wire type: Global wires with 30\% delay " + "penalty\n"; break; case (5): - cout << " Tag array, wire type: Low swing wires\n"; + cout << " Tag array, wire type: Low swing wires\n"; break; default: - cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <tag_array2->wt << endl; exit(-1); } } - if (g_ip->print_detail) - { - //if(g_ip->fully_assoc) return; -if(0){ //detailed power-gating output - if (g_ip->power_gating) - { - /* Energy/Power stats */ - cout << endl << endl << "Power-gating Components:" << endl << endl; - /* Data array power-gating stats */ - areaoverhead = fr->cache_ht*fr->cache_len/fr->uca_pg_reference->cache_ht/fr->uca_pg_reference->cache_len-1; - cout << " Power gating circuits (sleep transistors) induced area overhead: " << - areaoverhead << " % " << endl ; - wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy - + fr->data_array2->wl_sleep_wakeup_energy - + fr->data_array2->bl_floating_wakeup_energy; - wakeup_T = wakeup_T_data=MAX(fr->data_array2->sram_sleep_wakeup_latency, - MAX(fr->data_array2->wl_sleep_wakeup_latency,fr->data_array2->bl_floating_wakeup_latency)); - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy - + fr->tag_array2->wl_sleep_wakeup_energy - + fr->tag_array2->bl_floating_wakeup_energy; - wakeup_T_tag=MAX(fr->tag_array2->sram_sleep_wakeup_latency, - MAX(fr->tag_array2->wl_sleep_wakeup_latency,fr->tag_array2->bl_floating_wakeup_latency)); - - wakeup_E += wakeup_E_tag; - wakeup_T = MAX(wakeup_T_tag, wakeup_T_data); - - } - cout << " Power gating Wakeup Latency (ns): " << - wakeup_T*1e9 << endl ; - cout << " Power gating Wakeup Energy (nJ): " << - wakeup_E*1e9 << endl ; - - -//extra power gating details - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - cout << " Data array: " << endl; - else if (g_ip->pure_cam) - cout << " CAM array: " << endl; - else - cout << " Fully associative cache array: " << endl; - - cout << "\t Sub-array Sleep Tx size (um) - " << - fr->data_array2->sram_sleep_tx_width << endl; - - // cout << "\t Sub-array Sleep Tx total size (um) - " << - // fr->data_array2->sram_sleep_tx_width << endl; - - cout << "\t Sub-array Sleep Tx total area (mm^2) - " << - fr->data_array2->sram_sleep_tx_area*1e-6 << endl; - - cout << "\t Sub-array wakeup time (ns) - " << - fr->data_array2->sram_sleep_wakeup_latency*1e9 << endl; - - cout << "\t Sub-array Tx energy (nJ) - " << - fr->data_array2->sram_sleep_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - cout << endl; - cout << "\t WL Sleep Tx size (um) - " << - fr->data_array2->wl_sleep_tx_width << endl; - - // cout << "\t WL Sleep total Tx size (um) - " << - // fr->data_array2->wl_sleep_tx_width << endl; - - cout << "\t WL Sleep Tx total area (mm^2) - " << - fr->data_array2->wl_sleep_tx_area*1e-6 << endl; - - cout << "\t WL wakeup time (ns) - " << - fr->data_array2->wl_sleep_wakeup_latency*1e9 << endl; - - cout << "\t WL Tx energy (nJ) - " << - fr->data_array2->wl_sleep_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - cout << endl; - cout << "\t BL floating wakeup time (ns) - " << - fr->data_array2->bl_floating_wakeup_latency*1e9 << endl; - - cout << "\t BL floating Tx energy (nJ) - " << - fr->data_array2->bl_floating_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - cout << endl; - - cout << "\t Active mats per access - " << fr->data_array2->num_active_mats<data_array2->num_submarray_mats<pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << " Tag array: " << endl; - cout << "\t Sub-array Sleep Tx size (um) - " << - fr->tag_array2->sram_sleep_tx_width << endl; - - // cout << "\t Sub-array Sleep Tx total size (um) - " << - // fr->tag_array2->sram_sleep_tx_width << endl; - - cout << "\t Sub-array Sleep Tx total area (mm^2) - " << - fr->tag_array2->sram_sleep_tx_area*1e-6 << endl; - - cout << "\t Sub-array wakeup time (ns) - " << - fr->tag_array2->sram_sleep_wakeup_latency*1e9 << endl; - - cout << "\t Sub-array Tx energy (nJ) - " << - fr->tag_array2->sram_sleep_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - cout << endl; - cout << "\t WL Sleep Tx size (um) - " << - fr->tag_array2->wl_sleep_tx_width << endl; - - // cout << "\t WL Sleep total Tx size (um) - " << - // fr->tag_array2->wl_sleep_tx_width << endl; - - cout << "\t WL Sleep Tx total area (mm^2) - " << - fr->tag_array2->wl_sleep_tx_area*1e-6 << endl; - - cout << "\t WL wakeup time (ns) - " << - fr->tag_array2->wl_sleep_wakeup_latency*1e9 << endl; - - cout << "\t WL Tx energy (nJ) - " << - fr->tag_array2->wl_sleep_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - cout << endl; - cout << "\t BL floating wakeup time (ns) - " << - fr->tag_array2->bl_floating_wakeup_latency*1e9 << endl; - - cout << "\t BL floating Tx energy (nJ) - " << - fr->tag_array2->bl_floating_wakeup_energy*1e9 << endl; - //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - cout << endl; - - cout << "\t Active mats per access - " << fr->tag_array2->num_active_mats<tag_array2->num_submarray_mats<print_detail) { + // if(g_ip->fully_assoc) return; + if (0) { // detailed power-gating output + if (g_ip->power_gating) { + /* Energy/Power stats */ + cout << endl << endl << "Power-gating Components:" << endl << endl; + /* Data array power-gating stats */ + areaoverhead = fr->cache_ht * fr->cache_len / + fr->uca_pg_reference->cache_ht / + fr->uca_pg_reference->cache_len - + 1; + cout << " Power gating circuits (sleep transistors) induced area " + "overhead: " + << areaoverhead << " % " << endl; + wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy + + fr->data_array2->wl_sleep_wakeup_energy + + fr->data_array2->bl_floating_wakeup_energy; + wakeup_T = wakeup_T_data = + MAX(fr->data_array2->sram_sleep_wakeup_latency, + MAX(fr->data_array2->wl_sleep_wakeup_latency, + fr->data_array2->bl_floating_wakeup_latency)); + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy + + fr->tag_array2->wl_sleep_wakeup_energy + + fr->tag_array2->bl_floating_wakeup_energy; + wakeup_T_tag = MAX(fr->tag_array2->sram_sleep_wakeup_latency, + MAX(fr->tag_array2->wl_sleep_wakeup_latency, + fr->tag_array2->bl_floating_wakeup_latency)); + + wakeup_E += wakeup_E_tag; + wakeup_T = MAX(wakeup_T_tag, wakeup_T_data); + } + cout << " Power gating Wakeup Latency (ns): " << wakeup_T * 1e9 + << endl; + cout << " Power gating Wakeup Energy (nJ): " << wakeup_E * 1e9 + << endl; + + // extra power gating details + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: " << endl; + else if (g_ip->pure_cam) + cout << " CAM array: " << endl; + else + cout << " Fully associative cache array: " << endl; + + cout << "\t Sub-array Sleep Tx size (um) - " + << fr->data_array2->sram_sleep_tx_width << endl; + + // cout << "\t Sub-array Sleep Tx total size (um) - " << + // fr->data_array2->sram_sleep_tx_width << endl; + + cout << "\t Sub-array Sleep Tx total area (mm^2) - " + << fr->data_array2->sram_sleep_tx_area * 1e-6 << endl; + + cout << "\t Sub-array wakeup time (ns) - " + << fr->data_array2->sram_sleep_wakeup_latency * 1e9 << endl; + + cout << "\t Sub-array Tx energy (nJ) - " + << fr->data_array2->sram_sleep_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t WL Sleep Tx size (um) - " + << fr->data_array2->wl_sleep_tx_width << endl; + + // cout << "\t WL Sleep total Tx size (um) - " << + // fr->data_array2->wl_sleep_tx_width << endl; + + cout << "\t WL Sleep Tx total area (mm^2) - " + << fr->data_array2->wl_sleep_tx_area * 1e-6 << endl; + + cout << "\t WL wakeup time (ns) - " + << fr->data_array2->wl_sleep_wakeup_latency * 1e9 << endl; + + cout << "\t WL Tx energy (nJ) - " + << fr->data_array2->wl_sleep_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t BL floating wakeup time (ns) - " + << fr->data_array2->bl_floating_wakeup_latency * 1e9 << endl; + + cout << "\t BL floating Tx energy (nJ) - " + << fr->data_array2->bl_floating_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + cout << endl; + + cout << "\t Active mats per access - " + << fr->data_array2->num_active_mats << endl; + cout << "\t Active subarrays per mat - " + << fr->data_array2->num_submarray_mats << endl; + cout << endl; + /* Tag array area stats */ + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << " Tag array: " << endl; + cout << "\t Sub-array Sleep Tx size (um) - " + << fr->tag_array2->sram_sleep_tx_width << endl; + + // cout << "\t Sub-array Sleep Tx total size (um) - " << + // fr->tag_array2->sram_sleep_tx_width << endl; + + cout << "\t Sub-array Sleep Tx total area (mm^2) - " + << fr->tag_array2->sram_sleep_tx_area * 1e-6 << endl; + + cout << "\t Sub-array wakeup time (ns) - " + << fr->tag_array2->sram_sleep_wakeup_latency * 1e9 << endl; + + cout << "\t Sub-array Tx energy (nJ) - " + << fr->tag_array2->sram_sleep_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t WL Sleep Tx size (um) - " + << fr->tag_array2->wl_sleep_tx_width << endl; + + // cout << "\t WL Sleep total Tx size (um) - " << + // fr->tag_array2->wl_sleep_tx_width << endl; + + cout << "\t WL Sleep Tx total area (mm^2) - " + << fr->tag_array2->wl_sleep_tx_area * 1e-6 << endl; + + cout << "\t WL wakeup time (ns) - " + << fr->tag_array2->wl_sleep_wakeup_latency * 1e9 << endl; + + cout << "\t WL Tx energy (nJ) - " + << fr->tag_array2->wl_sleep_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t BL floating wakeup time (ns) - " + << fr->tag_array2->bl_floating_wakeup_latency * 1e9 << endl; + + cout << "\t BL floating Tx energy (nJ) - " + << fr->tag_array2->bl_floating_wakeup_energy * 1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + + cout << "\t Active mats per access - " + << fr->tag_array2->num_active_mats << endl; + cout << "\t Active subarrays per mat - " + << fr->tag_array2->num_submarray_mats << endl; + cout << endl; + } + } + } /* Delay stats */ /* data array stats */ cout << endl << "Time Components:" << endl << endl; - cout << " Data side (with Output driver) (ns): " << - fr->data_array2->access_time/1e-9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->access_time/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << " Data side (with Output driver) (ns): " + << fr->data_array2->access_time / 1e-9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->access_time / 1e-9 << " (@DVS_Level" + << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; + } + cout << endl; + + cout << "\tH-tree delay outside banks (ns): " + << fr->data_array2->delay_route_to_bank * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_route_to_bank * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tH-tree input delay (inside a bank) (ns): " + << fr->data_array2->delay_input_htree * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_input_htree * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << "\tDecoder + wordline delay (ns): " + << fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->data_array2->delay_row_decoder * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2 + ->delay_row_predecode_driver_and_block * + 1e9 + + fr->uca_q[i]->data_array2->delay_row_decoder * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + } else { + cout << "\tCAM search delay (ns): " + << fr->data_array2->delay_matchlines * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_matchlines * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; } - cout<< endl; - cout << "\tH-tree delay outside banks (ns): " << - fr->data_array2->delay_route_to_bank * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_route_to_bank * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tBitline delay (ns): " << fr->data_array2->delay_bitlines / 1e-9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_bitlines / 1e-9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSense Amplifier delay (ns): " + << fr->data_array2->delay_sense_amp * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_sense_amp * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tH-tree output delay (inside a bank) (ns): " + << fr->data_array2->delay_subarray_output_driver * 1e9 + + fr->data_array2->delay_dout_htree * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->delay_subarray_output_driver * 1e9 + + fr->uca_q[i]->data_array2->delay_dout_htree * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + if (g_ip->power_gating) { + cout << "\tPower gating wakeup time (ns) - " << wakeup_T_data * 1e9 + << endl; + } + + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + /* tag array stats */ + cout << endl + << " Tag side (with Output driver) (ns): " + << fr->tag_array2->access_time / 1e-9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->access_time / 1e-9 << " (@DVS_Level" + << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] << "); "; + } + cout << endl; + cout << "\tH-tree delay outside banks (ns): " + << fr->tag_array2->delay_route_to_bank * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_route_to_bank * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tH-tree input delay (inside a bank) (ns): " << - fr->data_array2->delay_input_htree * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_input_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tH-tree input delay (inside a bank) (ns): " + << fr->tag_array2->delay_input_htree * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_input_htree * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << "\tDecoder + wordline delay (ns): " << - fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->data_array2->delay_row_decoder * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->uca_q[i]->data_array2->delay_row_decoder * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tDecoder + wordline delay (ns): " + << fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->tag_array2->delay_row_decoder * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2 + ->delay_row_predecode_driver_and_block * + 1e9 + + fr->uca_q[i]->tag_array2->delay_row_decoder * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tBitline delay (ns): " << fr->tag_array2->delay_bitlines / 1e-9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_bitlines * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSense Amplifier delay (ns): " + << fr->tag_array2->delay_sense_amp * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_sense_amp * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tComparator delay (ns): " + << fr->tag_array2->delay_comparator * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_comparator * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tH-tree output delay (inside a bank) (ns): " + << fr->tag_array2->delay_subarray_output_driver * 1e9 + + fr->tag_array2->delay_dout_htree * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->delay_subarray_output_driver * 1e9 + + fr->uca_q[i]->tag_array2->delay_dout_htree * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + if (g_ip->power_gating) { + cout << "\tPower gating wakeup time (ns) - " << wakeup_T_tag * 1e9 + << endl; + } } - else - { - cout << "\tCAM search delay (ns): " << - fr->data_array2->delay_matchlines * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_matchlines * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + + /* Energy/Power stats */ + cout << endl << endl << "Power Components:" << endl << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << " Data array: Total dynamic read energy/access (nJ): " + << fr->data_array2->power.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power.readOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + if (g_ip->power_gating) { + cout << "\tTotal leakage power of a bank, power gated "; + + if (!g_ip->user_defined_vcc_underflow) { + cout << "with "; + } else { + cout << "without "; } - cout<< endl; - } + cout << "retaining memory content, including its network outside (mW): " + << (g_ip->long_channel_device + ? fr->data_array2->power.readOp.power_gated_leakage * + long_channel_leakage_reduction + : fr->data_array2->power.readOp.power_gated_leakage) * + 1e3 + << endl; + } + // else + // { + cout << "\tTotal leakage power of a bank without power gating, including " + "its network outside (mW): " + << (g_ip->long_channel_device + ? fr->data_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->data_array2->power.readOp.leakage) * + 1e3; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i]->data_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->uca_q[i]->data_array2->power.readOp.leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tBitline delay (ns): " << - fr->data_array2->delay_bitlines/1e-9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_bitlines/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + // } - cout << "\tSense Amplifier delay (ns): " << - fr->data_array2->delay_sense_amp * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_sense_amp*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tTotal energy in H-tree outside banks (that includes both " + "address and data transfer) (nJ): " + << (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_routing_to_bank.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tInput H-tree inside bank Energy (nJ): " + << (fr->data_array2->power_addr_input_htree.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_addr_input_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tH-tree output delay (inside a bank) (ns): " << - fr->data_array2->delay_subarray_output_driver * 1e9 + - fr->data_array2->delay_dout_htree * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->delay_subarray_output_driver * 1e9 + - fr->uca_q[i]->data_array2->delay_dout_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tOutput Htree inside bank Energy (nJ): " + << fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << fr->uca_q[i] + ->data_array2->power_data_output_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - cout<< endl; - if (g_ip->power_gating) - { - cout << "\tPower gating wakeup time (ns) - " << - wakeup_T_data*1e9 << endl; - } + cout << endl; - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - /* tag array stats */ - cout << endl << " Tag side (with Output driver) (ns): " << - fr->tag_array2->access_time/1e-9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->access_time/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tDecoder (nJ): " + << fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->data_array2->power_row_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_row_predecoder_blocks.readOp + .dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tH-tree delay outside banks (ns): " << - fr->tag_array2->delay_route_to_bank * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_route_to_bank * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + + cout << "\tWordline (nJ): " + << fr->data_array2->power_row_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_row_decoders.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tH-tree input delay (inside a bank) (ns): " << - fr->tag_array2->delay_input_htree * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_input_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tBitline mux & associated drivers (nJ): " + << fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << (fr->uca_q[i] + ->data_array2->power_bit_mux_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_bit_mux_decoders.readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tDecoder + wordline delay (ns): " << - fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->tag_array2->delay_row_decoder * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->uca_q[i]->tag_array2->delay_row_decoder * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tSense amp mux & associated drivers (nJ): " + << fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp + .dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp + .dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_1_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_1_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_senseamp_mux_lev_1_decoders + .readOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_2_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_2_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_senseamp_mux_lev_2_decoders + .readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tBitline delay (ns): " << - fr->tag_array2->delay_bitlines/1e-9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_bitlines * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tBitlines precharge and equalization circuit (nJ): " + << fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << fr->uca_q[i] + ->data_array2->power_prechg_eq_drivers.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tSense Amplifier delay (ns): " << - fr->tag_array2->delay_sense_amp * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_sense_amp * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tBitlines (nJ): " + << fr->data_array2->power_bitlines.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_bitlines.readOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tComparator delay (ns): " << - fr->tag_array2->delay_comparator * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_comparator * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tSense amplifier energy (nJ): " + << fr->data_array2->power_sense_amps.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_sense_amps.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - cout << "\tH-tree output delay (inside a bank) (ns): " << - fr->tag_array2->delay_subarray_output_driver * 1e9 + - fr->tag_array2->delay_dout_htree * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->delay_subarray_output_driver * 1e9 + - fr->uca_q[i]->tag_array2->delay_dout_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tSub-array output driver (nJ): " + << fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_output_drivers_at_subarray.readOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } cout << endl; - if (g_ip->power_gating) - { - cout << "\tPower gating wakeup time (ns) - " << - wakeup_T_tag*1e9 << endl; + + if (g_ip->power_gating) { + cout << "\tTotal leakage power in H-tree outside a bank when power " + "gated (that includes both " + "address and data network) ((mW)): " + << (g_ip->long_channel_device + ? fr->data_array2->power_routing_to_bank.readOp + .power_gated_leakage * + long_channel_leakage_reduction + : fr->data_array2->power_routing_to_bank.readOp + .power_gated_leakage) * + 1e3 + << endl; + } + // else + // { + cout << "\tTotal leakage power in H-tree outside a bank (that includes " + "both " + "address and data network) ((mW)): " + << (g_ip->long_channel_device + ? fr->data_array2->power_routing_to_bank.readOp.leakage * + long_channel_leakage_reduction + : fr->data_array2->power_routing_to_bank.readOp.leakage) * + 1e3; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i] + ->data_array2->power_routing_to_bank.readOp + .leakage * + long_channel_leakage_reduction + : fr->uca_q[i] + ->data_array2->power_routing_to_bank.readOp + .leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - } + cout << endl; + // } + // cout << "\tTotal leakage power in H-tree (that includes both " + // "address and data network) ((mW)): " << + // (fr->data_array2->power_addr_input_htree.readOp.leakage + // + fr->data_array2->power_data_output_htree.readOp.leakage + + // fr->data_array2->power_routing_to_bank.readOp.leakage) + // * 1e3 << endl; + + // cout << "\tTotal leakage power in cells (mW): " << + // (fr->data_array2->array_leakage) * 1e3 << endl; + // cout << "\tTotal leakage power in row logic(mW): " << + // (fr->data_array2->wl_leakage) * 1e3 << endl; + // cout << "\tTotal leakage power in column logic(mW): " << + // (fr->data_array2->cl_leakage) * 1e3 << endl; + // + // cout << "\tTotal gate leakage power in H-tree (that includes + // both " "address and data network) ((mW)): " << + // (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + // + fr->data_array2->power_data_output_htree.readOp.gate_leakage + + // fr->data_array2->power_routing_to_bank.readOp.gate_leakage) + // * 1e3 << endl; + } + + if (g_ip->pure_cam || g_ip->fully_assoc) { + + if (g_ip->pure_cam) + cout << " CAM array:" << endl; + // cout << " Total dynamic associative search + // energy/access (nJ): " << + // fr->data_array2->power.searchOp.dynamic * 1e9 << + // endl; + // cout << "\tTotal energy in H-tree (that includes both " + // "match key and data transfer) (nJ): " << + // (fr->data_array2->power_htree_in_search.searchOp.dynamic + // + fr->data_array2->power_htree_out_search.searchOp.dynamic + + // fr->data_array2->power_routing_to_bank.searchOp.dynamic) + // * 1e9 << endl; cout << "\tKeyword input and result + // output Htrees inside bank Energy (nJ): " << + // (fr->data_array2->power_htree_in_search.searchOp.dynamic + // + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << + // endl; cout << "\tSearchlines (nJ): " << + // fr->data_array2->power_searchline.searchOp.dynamic + // * 1e9 + + // fr->data_array2->power_searchline_precharge.searchOp.dynamic + // * 1e9 << endl; cout << "\tMatchlines (nJ): " << + // fr->data_array2->power_matchlines.searchOp.dynamic + // * 1e9 + + // fr->data_array2->power_matchline_precharge.searchOp.dynamic + // * 1e9 << endl; cout << "\tSub-array output driver (nJ): + // " << + // fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic + // * 1e9 << endl; + // + // + // cout <data_array2->power.readOp.dynamic * 1e9 + // << endl; + // cout << "\tTotal energy in H-tree (that includes both " + // "address and data transfer) (nJ): " << + // (fr->data_array2->power_addr_input_htree.readOp.dynamic + // + fr->data_array2->power_data_output_htree.readOp.dynamic + + // fr->data_array2->power_routing_to_bank.readOp.dynamic) + // * 1e9 << endl; cout << "\tOutput Htree inside bank + // Energy (nJ): " << + // fr->data_array2->power_data_output_htree.readOp.dynamic + // * 1e9 << endl; cout << "\tDecoder (nJ): " << + // fr->data_array2->power_row_predecoder_drivers.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_row_predecoder_blocks.readOp.dynamic + // * 1e9 << endl; cout << "\tWordline (nJ): " << + // fr->data_array2->power_row_decoders.readOp.dynamic + // * 1e9 << endl; cout << "\tBitline mux & associated + // drivers (nJ): " + // << + // fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 + // << endl; cout << "\tSense amp mux & associated drivers + // (nJ): " << + // fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic + // * 1e9 << endl; cout << "\tBitlines (nJ): " << + // fr->data_array2->power_bitlines.readOp.dynamic + // * 1e9 + + // fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< + // endl; cout << "\tSense amplifier energy (nJ): " << + // fr->data_array2->power_sense_amps.readOp.dynamic + // * 1e9 << endl; cout << "\tSub-array output driver (nJ): + // " << + // fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic + // * 1e9 << endl; + // + // cout << endl <<" Total leakage power of a bank (mW): " + // << + // fr->data_array2->power.readOp.leakage * 1e3 << + // endl; + // } + // else + // { + if (g_ip->fully_assoc) + cout << " Fully associative array:" << endl; + + cout << " Total dynamic associative search energy/access (nJ): " + << fr->data_array2->power.searchOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power.searchOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tTotal energy in H-tree outside banks(that includes both " + "match key and data transfer) (nJ): " + << (fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << fr->uca_q[i] + ->data_array2->power_routing_to_bank.searchOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - /* Energy/Power stats */ - cout << endl << endl << "Power Components:" << endl << endl; + cout << "\tMatch Key input Htrees inside bank Energy (nJ): " + << (fr->data_array2->power_htree_in_search.searchOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << fr->uca_q[i] + ->data_array2->power_htree_in_search.searchOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << " Data array: Total dynamic read energy/access (nJ): " << - fr->data_array2->power.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - if (g_ip->power_gating) - { - cout << "\tTotal leakage power of a bank, power gated "; - - if (!g_ip->user_defined_vcc_underflow) - { - cout << "with "; - } - else - { - cout << "without "; - } - cout<<"retaining memory content, including its network outside (mW): " << - (g_ip->long_channel_device ? fr->data_array2->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.power_gated_leakage)*1e3 << endl; - } -// else -// { - cout << "\tTotal leakage power of a bank without power gating, including its network outside (mW): " << - (g_ip->long_channel_device ? fr->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.leakage)*1e3; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device ?fr->uca_q[i]->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->data_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - -// } - - cout << "\tTotal energy in H-tree outside banks (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tInput H-tree inside bank Energy (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic) * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tResult output Htrees inside bank Energy (nJ): " + << (fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_htree_out_search.searchOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tSearchlines (nJ): " + << fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_searchline.searchOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_searchline_precharge.searchOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_bitlines.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - - if (g_ip->power_gating) - { - cout << "\tTotal leakage power in H-tree outside a bank when power gated (that includes both " - "address and data network) ((mW)): " << - (g_ip->long_channel_device?fr->data_array2->power_routing_to_bank.readOp.power_gated_leakage * long_channel_leakage_reduction: fr->data_array2->power_routing_to_bank.readOp.power_gated_leakage) * 1e3 << endl; - } -// else -// { - cout << "\tTotal leakage power in H-tree outside a bank (that includes both " - "address and data network) ((mW)): " << - (g_ip->long_channel_device? fr->data_array2->power_routing_to_bank.readOp.leakage * long_channel_leakage_reduction: fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device? fr->uca_q[i]->data_array2->power_routing_to_bank.readOp.leakage* long_channel_leakage_reduction: fr->uca_q[i]->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - -// } - // cout << "\tTotal leakage power in H-tree (that includes both " - // "address and data network) ((mW)): " << - // (fr->data_array2->power_addr_input_htree.readOp.leakage + - // fr->data_array2->power_data_output_htree.readOp.leakage + - // fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; - - // cout << "\tTotal leakage power in cells (mW): " << - // (fr->data_array2->array_leakage) * 1e3 << endl; - // cout << "\tTotal leakage power in row logic(mW): " << - // (fr->data_array2->wl_leakage) * 1e3 << endl; - // cout << "\tTotal leakage power in column logic(mW): " << - // (fr->data_array2->cl_leakage) * 1e3 << endl; - // - // cout << "\tTotal gate leakage power in H-tree (that includes both " - // "address and data network) ((mW)): " << - // (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + - // fr->data_array2->power_data_output_htree.readOp.gate_leakage + - // fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; - } - - if (g_ip->pure_cam||g_ip->fully_assoc) - { - - if (g_ip->pure_cam) cout << " CAM array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; - // cout << "\tTotal energy in H-tree (that includes both " - // "match key and data transfer) (nJ): " << - // (fr->data_array2->power_htree_in_search.searchOp.dynamic + - // fr->data_array2->power_htree_out_search.searchOp.dynamic + - // fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; - // cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << - // (fr->data_array2->power_htree_in_search.searchOp.dynamic + - // fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; - // cout << "\tSearchlines (nJ): " << - // fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - // fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; - // cout << "\tMatchlines (nJ): " << - // fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - // fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; - // cout << "\tSub-array output driver (nJ): " << - // fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - // - // - // cout <data_array2->power.readOp.dynamic * 1e9 << endl; - // cout << "\tTotal energy in H-tree (that includes both " - // "address and data transfer) (nJ): " << - // (fr->data_array2->power_addr_input_htree.readOp.dynamic + - // fr->data_array2->power_data_output_htree.readOp.dynamic + - // fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - // cout << "\tOutput Htree inside bank Energy (nJ): " << - // fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - // cout << "\tDecoder (nJ): " << - // fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - // fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - // cout << "\tWordline (nJ): " << - // fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - // cout << "\tBitline mux & associated drivers (nJ): " << - // fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - // fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - // fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - // cout << "\tSense amp mux & associated drivers (nJ): " << - // fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - // fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - // fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - // fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - // fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - // fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - // cout << "\tBitlines (nJ): " << - // fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - // fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; - // cout << "\tSense amplifier energy (nJ): " << - // fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - // cout << "\tSub-array output driver (nJ): " << - // fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - // - // cout << endl <<" Total leakage power of a bank (mW): " << - // fr->data_array2->power.readOp.leakage * 1e3 << endl; - // } - // else - // { - if (g_ip->fully_assoc) cout << " Fully associative array:"<data_array2->power.searchOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tTotal energy in H-tree outside banks(that includes both " - "match key and data transfer) (nJ): " << - (fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tMatch Key input Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic ) * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_htree_in_search.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tResult output Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_htree_out_search.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSearchlines (nJ): " << - fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tMatchlines (nJ): " << - fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tMatchlines (nJ): " + << fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_matchlines.searchOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_matchline_precharge.searchOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + if (g_ip->fully_assoc) { + cout << "\tData portion wordline (nJ): " + << fr->data_array2->power_matchline_to_wordline_drv.searchOp + .dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_matchline_to_wordline_drv + .searchOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - cout<< endl; - - if (g_ip->fully_assoc) - { - cout << "\tData portion wordline (nJ): " << - fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tData Bitlines (nJ): " << - fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_bitlines.searchOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_sense_amps.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - } - - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << endl; + + cout << "\tData Bitlines (nJ): " + << fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << (fr->uca_q[i]->data_array2->power_bitlines.searchOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_prechg_eq_drivers.searchOp + .dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - cout<< endl; - - cout <data_array2->power.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << endl; + + cout << "\tSense amplifier energy (nJ): " + << fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_sense_amps.searchOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - cout<< endl; + cout << endl; + } + + cout << "\tSub-array output driver (nJ): " + << fr->data_array2->power_output_drivers_at_subarray.searchOp + .dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_output_drivers_at_subarray + .searchOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << endl + << " Total dynamic read energy/access (nJ): " + << fr->data_array2->power.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power.readOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tTotal energy in H-tree outside banks(that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tInput Htree inside bank Energy (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic ) * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->uca_q[i]->data_array2->power_prechg_eq_drivers.readOp.dynamic* 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; + cout << "\tTotal energy in H-tree outside banks(that includes both " + "address and data transfer) (nJ): " + << (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_routing_to_bank.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tInput Htree inside bank Energy (nJ): " + << (fr->data_array2->power_addr_input_htree.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_addr_input_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tOutput Htree inside bank Energy (nJ): " + << fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << fr->uca_q[i] + ->data_array2->power_data_output_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tDecoder (nJ): " + << fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->data_array2->power_row_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_row_predecoder_blocks.readOp + .dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << endl <<" Total leakage power of a bank, including its network outside (mW): " << - (g_ip->long_channel_device ? fr->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.leakage)*1e3; //CAM/FA does not support PG yet - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device ?fr->uca_q[i]->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->data_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; + cout << "\tWordline (nJ): " + << fr->data_array2->power_row_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_row_decoders.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tBitline mux & associated drivers (nJ): " + << fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout + << (fr->uca_q[i] + ->data_array2->power_bit_mux_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_bit_mux_decoders.readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSense amp mux & associated drivers (nJ): " + << fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp + .dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp + .dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_1_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_1_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_senseamp_mux_lev_1_decoders + .readOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_2_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2 + ->power_senseamp_mux_lev_2_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_senseamp_mux_lev_2_decoders + .readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tBitlines (nJ): " + << fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i]->data_array2->power_bitlines.readOp.dynamic * + 1e9 + + fr->uca_q[i] + ->data_array2->power_prechg_eq_drivers.readOp + .dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSense amplifier energy (nJ): " + << fr->data_array2->power_sense_amps.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->data_array2->power_sense_amps.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSub-array output driver (nJ): " + << fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->data_array2->power_output_drivers_at_subarray.readOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << endl + << " Total leakage power of a bank, including its network outside " + "(mW): " + << (g_ip->long_channel_device + ? fr->data_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->data_array2->power.readOp.leakage) * + 1e3; // CAM/FA does not support PG yet + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i]->data_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->uca_q[i]->data_array2->power.readOp.leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + } + + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl + << " Tag array: Total dynamic read energy/access (nJ): " + << fr->tag_array2->power.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->power.readOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + if (g_ip->power_gating) { + cout << "\tTotal leakage power of a bank, power gated "; + if (!g_ip->user_defined_vcc_underflow) { + cout << "with "; + } else { + cout << "without "; } - cout<< endl; - - - } - - - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << - fr->tag_array2->power.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - if (g_ip->power_gating) - { - cout << "\tTotal leakage power of a bank, power gated "; - if (!g_ip->user_defined_vcc_underflow) - { - cout << "with "; - } - else - { - cout << "without "; - } - cout<<"retaining memory content, including its network outside (mW): " << - (g_ip->long_channel_device ? fr->tag_array2->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->tag_array2->power.readOp.power_gated_leakage)* 1e3 << endl; - } -// else -// { - cout << "\tTotal leakage power of a bank without power gating, including its network outside (mW): " << - (g_ip->long_channel_device ? fr->tag_array2->power.readOp.leakage * long_channel_leakage_reduction: fr->tag_array2->power.readOp.leakage)* 1e3; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device ? fr->uca_q[i]->tag_array2->power.readOp.leakage *long_channel_leakage_reduction: fr->uca_q[i]->tag_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - -// } -// cout << "\tTotal leakage read/write power of a bank (mW): " << -// fr->tag_array2->power.readOp.leakage * 1e3 << endl; + cout << "retaining memory content, including its network outside (mW): " + << (g_ip->long_channel_device + ? fr->tag_array2->power.readOp.power_gated_leakage * + long_channel_leakage_reduction + : fr->tag_array2->power.readOp.power_gated_leakage) * + 1e3 + << endl; + } + // else + // { + cout << "\tTotal leakage power of a bank without power gating, including " + "its network outside (mW): " + << (g_ip->long_channel_device + ? fr->tag_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->tag_array2->power.readOp.leakage) * + 1e3; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i]->tag_array2->power.readOp.leakage * + long_channel_leakage_reduction + : fr->uca_q[i]->tag_array2->power.readOp.leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + // } + // cout << "\tTotal leakage read/write power of a bank (mW): " << + // fr->tag_array2->power.readOp.leakage * 1e3 << endl; cout << "\tTotal energy in H-tree outside banks (that includes both " - "address and data transfer) (nJ): " << - (fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - - cout << "\tInput H-tree inside banks Energy (nJ): " << - (fr->tag_array2->power_addr_input_htree.readOp.dynamic) * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tOutput Htree inside a bank Energy (nJ): " << - fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tDecoder (nJ): " << - fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tWordline (nJ): " << - fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitline mux & associated drivers (nJ): " << - fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9) - <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - cout << "\tBitlines (nJ): " << - fr->tag_array2->power_bitlines.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_bitlines.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSense amplifier energy (nJ): " << - fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - cout << "\tSub-array output driver (nJ): " << - fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; iuca_q[i]->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; - - if (g_ip->power_gating) - { - cout << "\tTotal leakage power in H-tree outside a bank when power gated (that includes both " - "address and data network) ((mW)): " << - (g_ip->long_channel_device ? fr->tag_array2->power_routing_to_bank.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->tag_array2->power_routing_to_bank.readOp.power_gated_leakage) * 1e3 << endl; - } -// else -// { - cout << "\tTotal leakage power in H-tree outside a bank (that includes both " - "address and data network) without power gating((mW)): " << - (g_ip->long_channel_device ? fr->tag_array2->power_routing_to_bank.readOp.leakage*long_channel_leakage_reduction : fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3; - if (dvs) - { - cout<<" (@DVS_Level0); "; - for (i = 0; ilong_channel_device ? fr->uca_q[i]->tag_array2->power_routing_to_bank.readOp.leakage *long_channel_leakage_reduction : fr->uca_q[i]->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); "; - } - cout<< endl; -// } - -// cout << "\tTotal leakage power of a bank (mW): " << -// fr->tag_array2->power.readOp.leakage * 1e3 << endl; -// cout << "\tTotal leakage power in H-tree (that includes both " -// "address and data network) ((mW)): " << -// (fr->tag_array2->power_addr_input_htree.readOp.leakage + -// fr->tag_array2->power_data_output_htree.readOp.leakage + -// fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; -// -// cout << "\tTotal leakage power in cells (mW): " << -// (fr->tag_array2->array_leakage) * 1e3 << endl; -// cout << "\tTotal leakage power in row logic(mW): " << -// (fr->tag_array2->wl_leakage) * 1e3 << endl; -// cout << "\tTotal leakage power in column logic(mW): " << -// (fr->tag_array2->cl_leakage) * 1e3 << endl; -// cout << "\tTotal gate leakage power in H-tree (that includes both " -// "address and data network) ((mW)): " << -// (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + -// fr->tag_array2->power_data_output_htree.readOp.gate_leakage + -// fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; - } - - cout << endl << endl << "Area Components:" << endl << endl; - /* Data array area stats */ - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else if (g_ip->pure_cam) - cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else - cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + "address and data transfer) (nJ): " + << (fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->tag_array2->power_routing_to_bank.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - cout << "\tHeight (mm): " << - fr->data_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->data_array2->all_banks_width*1e-3 << endl; - if (g_ip->print_detail) { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->data_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->data_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->data_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->data_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->data_array2->subarray_length*1e-3 << endl; - if (g_ip->power_gating) - { - overhead_data = (fr->data_array2->area/fr->uca_pg_reference->data_array2->area-1)*100;//%; - cout << " Power gating circuits (sleep transistors) induced area overhead: " << overhead_data <<"%" <tag_array2->power_addr_input_htree.readOp.dynamic) * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->tag_array2->power_addr_input_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - } + cout << endl; - /* Tag array area stats */ - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; - cout << "\tHeight (mm): " << - fr->tag_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->tag_array2->all_banks_width*1e-3 << endl; + cout << "\tOutput Htree inside a bank Energy (nJ): " + << fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->tag_array2->power_data_output_htree.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; - if (g_ip->print_detail) - { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->tag_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->tag_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->tag_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->tag_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->tag_array2->subarray_length*1e-3 << endl; + cout << "\tDecoder (nJ): " + << fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->tag_array2->power_row_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2->power_row_predecoder_blocks.readOp + .dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } + cout << endl; - if (g_ip->power_gating) - { - overhead_tag = (fr->tag_array2->area/fr->uca_pg_reference->tag_array2->area-1)*100;//%; - cout << " Power gating circuits (sleep transistors) induced area overhead: " << overhead_tag <<"%" <tag_array2->power_row_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->power_row_decoders.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; } - } + cout << endl; + cout << "\tBitline mux & associated drivers (nJ): " + << fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * + 1e9 + + fr->tag_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->tag_array2->power_bit_mux_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2->power_bit_mux_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2->power_bit_mux_decoders.readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tSense amp mux & associated drivers (nJ): " + << fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp + .dynamic * + 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers + .readOp.dynamic * + 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks + .readOp.dynamic * + 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp + .dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (fr->uca_q[i] + ->tag_array2 + ->power_senseamp_mux_lev_1_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2 + ->power_senseamp_mux_lev_1_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2->power_senseamp_mux_lev_1_decoders + .readOp.dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2 + ->power_senseamp_mux_lev_2_predecoder_drivers.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2 + ->power_senseamp_mux_lev_2_predecoder_blocks.readOp + .dynamic * + 1e9 + + fr->uca_q[i] + ->tag_array2->power_senseamp_mux_lev_2_decoders + .readOp.dynamic * + 1e9) + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tBitlines precharge and equalization circuit (nJ): " + << fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->tag_array2->power_prechg_eq_drivers.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tBitlines (nJ): " + << fr->tag_array2->power_bitlines.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->power_bitlines.readOp.dynamic * 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + cout << "\tSense amplifier energy (nJ): " + << fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i]->tag_array2->power_sense_amps.readOp.dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + cout << "\tSub-array output driver (nJ): " + << fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * + 1e9; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << fr->uca_q[i] + ->tag_array2->power_output_drivers_at_subarray.readOp + .dynamic * + 1e9 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + + if (g_ip->power_gating) { + cout << "\tTotal leakage power in H-tree outside a bank when power " + "gated (that includes both " + "address and data network) ((mW)): " + << (g_ip->long_channel_device + ? fr->tag_array2->power_routing_to_bank.readOp + .power_gated_leakage * + long_channel_leakage_reduction + : fr->tag_array2->power_routing_to_bank.readOp + .power_gated_leakage) * + 1e3 + << endl; + } + // else + // { + cout << "\tTotal leakage power in H-tree outside a bank (that includes " + "both " + "address and data network) without power gating((mW)): " + << (g_ip->long_channel_device + ? fr->tag_array2->power_routing_to_bank.readOp.leakage * + long_channel_leakage_reduction + : fr->tag_array2->power_routing_to_bank.readOp.leakage) * + 1e3; + if (dvs) { + cout << " (@DVS_Level0); "; + for (i = 0; i < dvs_levels; i++) + cout << (g_ip->long_channel_device + ? fr->uca_q[i] + ->tag_array2->power_routing_to_bank.readOp + .leakage * + long_channel_leakage_reduction + : fr->uca_q[i] + ->tag_array2->power_routing_to_bank.readOp + .leakage) * + 1e3 + << " (@DVS_Level" << i + 1 << "_Vdd=" << g_ip->dvs_voltage[i] + << "); "; + } + cout << endl; + // } + + // cout << "\tTotal leakage power of a bank (mW): " << + // fr->tag_array2->power.readOp.leakage * 1e3 << endl; + // cout << "\tTotal leakage power in H-tree (that includes both " + // "address and data network) ((mW)): " << + // (fr->tag_array2->power_addr_input_htree.readOp.leakage + + // fr->tag_array2->power_data_output_htree.readOp.leakage + // + fr->tag_array2->power_routing_to_bank.readOp.leakage) + // * 1e3 << endl; + // + // cout << "\tTotal leakage power in cells (mW): " << + // (fr->tag_array2->array_leakage) * 1e3 << endl; + // cout << "\tTotal leakage power in row logic(mW): " << + // (fr->tag_array2->wl_leakage) * 1e3 << endl; + // cout << "\tTotal leakage power in column logic(mW): " << + // (fr->tag_array2->cl_leakage) * 1e3 << endl; + // cout << "\tTotal gate leakage power in H-tree (that includes + // both " "address and data network) ((mW)): " << + // (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + // + fr->tag_array2->power_data_output_htree.readOp.gate_leakage + + // fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) + // * 1e3 << endl; + } + + cout << endl << endl << "Area Components:" << endl << endl; + /* Data array area stats */ + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 + << endl; + else if (g_ip->pure_cam) + cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 + << endl; + else + cout << " Fully associative cache array: Area (mm2): " + << fr->data_array2->area * 1e-6 << endl; - //Wire wpr; //TODO: this must change, since this changes the wire value during dvs loop. - //Wire::print_wire();//move outside output UCA + cout << "\tHeight (mm): " << fr->data_array2->all_banks_height * 1e-3 + << endl; + cout << "\tWidth (mm): " << fr->data_array2->all_banks_width * 1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " + << fr->data_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << fr->data_array2->mat_height * 1e-3 + << endl; + cout << "\t\tMAT Length (mm): " << fr->data_array2->mat_length * 1e-3 + << endl; + cout << "\t\tSubarray Height (mm): " + << fr->data_array2->subarray_height * 1e-3 << endl; + cout << "\t\tSubarray Length (mm): " + << fr->data_array2->subarray_length * 1e-3 << endl; + if (g_ip->power_gating) { + overhead_data = + (fr->data_array2->area / fr->uca_pg_reference->data_array2->area - + 1) * + 100; //%; + cout << " Power gating circuits (sleep transistors) induced area " + "overhead: " + << overhead_data << "%" << endl; + } + } - //cout << "FO4 = " << g_tp.FO4 << endl; + /* Tag array area stats */ + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl + << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 + << endl; + cout << "\tHeight (mm): " << fr->tag_array2->all_banks_height * 1e-3 + << endl; + cout << "\tWidth (mm): " << fr->tag_array2->all_banks_width * 1e-3 + << endl; + + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " + << fr->tag_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << fr->tag_array2->mat_height * 1e-3 + << endl; + cout << "\t\tMAT Length (mm): " << fr->tag_array2->mat_length * 1e-3 + << endl; + cout << "\t\tSubarray Height (mm): " + << fr->tag_array2->subarray_height * 1e-3 << endl; + cout << "\t\tSubarray Length (mm): " + << fr->tag_array2->subarray_length * 1e-3 << endl; + } + + if (g_ip->power_gating) { + overhead_tag = + (fr->tag_array2->area / fr->uca_pg_reference->tag_array2->area - + 1) * + 100; //%; + cout << " Power gating circuits (sleep transistors) induced area " + "overhead: " + << overhead_tag << "%" << endl + << endl; + } + } + + // Wire wpr; //TODO: this must change, since this changes the wire value + // during dvs loop. Wire::print_wire();//move outside output UCA + + // cout << "FO4 = " << g_tp.FO4 << endl; } } -//McPAT's plain interface, please keep !!! -uca_org_t cacti_interface(InputParameter * const local_interface) -{ -// g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; +// McPAT's plain interface, please keep !!! +uca_org_t cacti_interface(InputParameter *const local_interface) { + // g_ip = new InputParameter(); + // g_ip->add_ecc_b_ = true; uca_org_t fin_res; fin_res.valid = false; g_ip = local_interface; - -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// g_ip->is_cache=true; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } - - - if (!g_ip->error_checking()) exit(0); - + // g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + // g_ip->data_arr_peri_global_tech_type = + // data_arr_peri_global_tech_flavor_in; g_ip->tag_arr_ram_cell_tech_type = + // tag_arr_ram_cell_tech_flavor_in; g_ip->tag_arr_peri_global_tech_type = + // tag_arr_peri_global_tech_flavor_in; + // + // g_ip->ic_proj_type = interconnect_projection_type_in; + // g_ip->wire_is_mat_type = wire_inside_mat_type_in; + // g_ip->wire_os_mat_type = wire_outside_mat_type_in; + // g_ip->burst_len = BURST_LENGTH_in; + // g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + // g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + // + // g_ip->cache_sz = cache_size; + // g_ip->line_sz = line_size; + // g_ip->assoc = associativity; + // g_ip->nbanks = banks; + // g_ip->out_w = output_width; + // g_ip->specific_tag = specific_tag; + // if (tag_width == 0) { + // g_ip->tag_w = 42; + // } + // else { + // g_ip->tag_w = tag_width; + // } + // + // g_ip->access_mode = access_mode; + // g_ip->delay_wt = obj_func_delay; + // g_ip->dynamic_power_wt = obj_func_dynamic_power; + // g_ip->leakage_power_wt = obj_func_leakage_power; + // g_ip->area_wt = obj_func_area; + // g_ip->cycle_time_wt = obj_func_cycle_time; + // g_ip->delay_dev = dev_func_delay; + // g_ip->dynamic_power_dev = dev_func_dynamic_power; + // g_ip->leakage_power_dev = dev_func_leakage_power; + // g_ip->area_dev = dev_func_area; + // g_ip->cycle_time_dev = dev_func_cycle_time; + // g_ip->temp = temp; + // + // g_ip->F_sz_nm = tech_node; + // g_ip->F_sz_um = tech_node / 1000; + // g_ip->is_main_mem = (main_mem != 0) ? true : false; + // g_ip->is_cache = (cache ==1) ? true : false; + // g_ip->pure_ram = (cache ==0) ? true : false; + // g_ip->pure_cam = (cache ==2) ? true : false; + // g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : + // false; g_ip->ver_htree_wires_over_array = + // VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + // g_ip->broadcast_addr_din_over_ver_htrees = + // BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + // + // g_ip->num_rw_ports = rw_ports; + // g_ip->num_rd_ports = excl_read_ports; + // g_ip->num_wr_ports = excl_write_ports; + // g_ip->num_se_rd_ports = single_ended_read_ports; + // g_ip->num_search_ports = search_ports; + // + // g_ip->print_detail = 1; + // g_ip->nuca = 0; + // g_ip->is_cache=true; + // + // if (force_wiretype == 0) + // { + // g_ip->wt = Global; + // g_ip->force_wiretype = false; + // } + // else + // { g_ip->force_wiretype = true; + // if (wiretype==10) { + // g_ip->wt = Global_10; + // } + // if (wiretype==20) { + // g_ip->wt = Global_20; + // } + // if (wiretype==30) { + // g_ip->wt = Global_30; + // } + // if (wiretype==5) { + // g_ip->wt = Global_5; + // } + // if (wiretype==0) { + // g_ip->wt = Low_swing; + // } + // } + // //g_ip->wt = Global_5; + // if (force_config == 0) + // { + // g_ip->force_cache_config = false; + // } + // else + // { + // g_ip->force_cache_config = true; + // g_ip->ndbl=ndbl; + // g_ip->ndwl=ndwl; + // g_ip->nspd=nspd; + // g_ip->ndcm=ndcm; + // g_ip->ndsam1=ndsam1; + // g_ip->ndsam2=ndsam2; + // + // + // } + // + // if (ecc==0){ + // g_ip->add_ecc_b_=false; + // } + // else + // { + // g_ip->add_ecc_b_=true; + // } + + if (!g_ip->error_checking()) + exit(0); init_tech_params(g_ip->F_sz_um, false); Wire winit; // Do not delete this line. It initializes wires. solve(&fin_res); - if (!g_ip->dvs_voltage.empty()) - { - update_dvs(&fin_res); + if (!g_ip->dvs_voltage.empty()) { + update_dvs(&fin_res); } - if (g_ip->power_gating) - { - update_pg(&fin_res);//this is needed for compute area overhead of power-gating, even the gated power is calculated together un-gated leakage + if (g_ip->power_gating) { + update_pg(&fin_res); // this is needed for compute area overhead of + // power-gating, even the gated power is calculated + // together un-gated leakage } -// g_ip->display_ip(); -// output_UCA(&fin_res); -// output_data_csv(fin_res); -// Wire wprint;//reset wires to original configuration as in *.cfg file (dvs level 0) -// Wire::print_wire(); - // delete (g_ip); + // g_ip->display_ip(); + // output_UCA(&fin_res); + // output_data_csv(fin_res); + // Wire wprint;//reset wires to original configuration as in *.cfg file (dvs + // level 0) Wire::print_wire(); + // delete (g_ip); return fin_res; } -//McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter* const local_interface) -{ - // g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; +// McPAT's plain interface, please keep !!! +uca_org_t init_interface(InputParameter *const local_interface) { + // g_ip = new InputParameter(); + // g_ip->add_ecc_b_ = true; uca_org_t fin_res; fin_res.valid = false; g_ip = local_interface; - -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } - + // g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + // g_ip->data_arr_peri_global_tech_type = + // data_arr_peri_global_tech_flavor_in; g_ip->tag_arr_ram_cell_tech_type = + // tag_arr_ram_cell_tech_flavor_in; g_ip->tag_arr_peri_global_tech_type = + // tag_arr_peri_global_tech_flavor_in; + // + // g_ip->ic_proj_type = interconnect_projection_type_in; + // g_ip->wire_is_mat_type = wire_inside_mat_type_in; + // g_ip->wire_os_mat_type = wire_outside_mat_type_in; + // g_ip->burst_len = BURST_LENGTH_in; + // g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + // g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + // + // g_ip->cache_sz = cache_size; + // g_ip->line_sz = line_size; + // g_ip->assoc = associativity; + // g_ip->nbanks = banks; + // g_ip->out_w = output_width; + // g_ip->specific_tag = specific_tag; + // if (tag_width == 0) { + // g_ip->tag_w = 42; + // } + // else { + // g_ip->tag_w = tag_width; + // } + // + // g_ip->access_mode = access_mode; + // g_ip->delay_wt = obj_func_delay; + // g_ip->dynamic_power_wt = obj_func_dynamic_power; + // g_ip->leakage_power_wt = obj_func_leakage_power; + // g_ip->area_wt = obj_func_area; + // g_ip->cycle_time_wt = obj_func_cycle_time; + // g_ip->delay_dev = dev_func_delay; + // g_ip->dynamic_power_dev = dev_func_dynamic_power; + // g_ip->leakage_power_dev = dev_func_leakage_power; + // g_ip->area_dev = dev_func_area; + // g_ip->cycle_time_dev = dev_func_cycle_time; + // g_ip->temp = temp; + // + // g_ip->F_sz_nm = tech_node; + // g_ip->F_sz_um = tech_node / 1000; + // g_ip->is_main_mem = (main_mem != 0) ? true : false; + // g_ip->is_cache = (cache ==1) ? true : false; + // g_ip->pure_ram = (cache ==0) ? true : false; + // g_ip->pure_cam = (cache ==2) ? true : false; + // g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : + // false; g_ip->ver_htree_wires_over_array = + // VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + // g_ip->broadcast_addr_din_over_ver_htrees = + // BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + // + // g_ip->num_rw_ports = rw_ports; + // g_ip->num_rd_ports = excl_read_ports; + // g_ip->num_wr_ports = excl_write_ports; + // g_ip->num_se_rd_ports = single_ended_read_ports; + // g_ip->num_search_ports = search_ports; + // + // g_ip->print_detail = 1; + // g_ip->nuca = 0; + // + // if (force_wiretype == 0) + // { + // g_ip->wt = Global; + // g_ip->force_wiretype = false; + // } + // else + // { g_ip->force_wiretype = true; + // if (wiretype==10) { + // g_ip->wt = Global_10; + // } + // if (wiretype==20) { + // g_ip->wt = Global_20; + // } + // if (wiretype==30) { + // g_ip->wt = Global_30; + // } + // if (wiretype==5) { + // g_ip->wt = Global_5; + // } + // if (wiretype==0) { + // g_ip->wt = Low_swing; + // } + // } + // //g_ip->wt = Global_5; + // if (force_config == 0) + // { + // g_ip->force_cache_config = false; + // } + // else + // { + // g_ip->force_cache_config = true; + // g_ip->ndbl=ndbl; + // g_ip->ndwl=ndwl; + // g_ip->nspd=nspd; + // g_ip->ndcm=ndcm; + // g_ip->ndsam1=ndsam1; + // g_ip->ndsam2=ndsam2; + // + // + // } + // + // if (ecc==0){ + // g_ip->add_ecc_b_=false; + // } + // else + // { + // g_ip->add_ecc_b_=true; + // } g_ip->error_checking(); init_tech_params(g_ip->F_sz_um, false); Wire winit; // Do not delete this line. It initializes wires. - //solve(&fin_res); - //g_ip->display_ip(); + // solve(&fin_res); + // g_ip->display_ip(); - //solve(&fin_res); - //output_UCA(&fin_res); - //output_data_csv(fin_res); - // delete (g_ip); + // solve(&fin_res); + // output_UCA(&fin_res); + // output_data_csv(fin_res); + // delete (g_ip); return fin_res; } -void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) -{ +void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) { // Copy the InputParameter to global interface (g_ip) and do error checking. g_ip = local_interface; g_ip->error_checking(); // Initialize technology parameters - init_tech_params(g_ip->F_sz_um,false); + init_tech_params(g_ip->F_sz_um, false); Wire winit; // Do not delete this line. It initializes wires. diff --git a/cacti/io.h b/cacti/io.h index 1bd06cf..97b9219 100644 --- a/cacti/io.h +++ b/cacti/io.h @@ -29,17 +29,13 @@ * ***************************************************************************/ - #ifndef __IO_H__ #define __IO_H__ - -#include "const.h" #include "cacti_interface.h" +#include "const.h" - -void output_data_csv(const uca_org_t & fin_res); -void output_UCA(uca_org_t * fin_res); - +void output_data_csv(const uca_org_t &fin_res); +void output_UCA(uca_org_t *fin_res); #endif diff --git a/cacti/main.cc b/cacti/main.cc index 2e40ef3..f50b19d 100644 --- a/cacti/main.cc +++ b/cacti/main.cc @@ -9,15 +9,16 @@ * of the software, derivative works or modified versions, and any portions * thereof, and both notices must appear in supporting documentation. * - * Users of this software agree to the terms and conditions set forth herein, and - * hereby grant back to Hewlett-Packard Company and its affiliated companies ("HP") - * a non-exclusive, unrestricted, royalty-free right and license under any changes, - * enhancements or extensions made to the core functions of the software, including - * but not limited to those affording compatibility with other hardware or software - * environments, but excluding applications which incorporate this software. - * Users further agree to use their best efforts to return to HP any such changes, - * enhancements or extensions that they make and inform HP of noteworthy uses of - * this software. Correspondence should be provided to HP at: + * Users of this software agree to the terms and conditions set forth herein, + *and hereby grant back to Hewlett-Packard Company and its affiliated companies + *("HP") a non-exclusive, unrestricted, royalty-free right and license under any + *changes, enhancements or extensions made to the core functions of the + *software, including but not limited to those affording compatibility with + *other hardware or software environments, but excluding applications which + *incorporate this software. Users further agree to use their best efforts to + *return to HP any such changes, enhancements or extensions that they make and + *inform HP of noteworthy uses of this software. Correspondence should be + *provided to HP at: * * Director of Intellectual Property Licensing * Office of Strategy and Technology @@ -40,161 +41,73 @@ *------------------------------------------------------------*/ #include "io.h" + #include using namespace std; - -int main(int argc,char *argv[]) -{ +int main(int argc, char *argv[]) { uca_org_t result; - if (argc != 53 && argc != 55) - { + if (argc != 53 && argc != 55) { bool infile_specified = false; string infile_name(""); - for (int32_t i = 0; i < argc; i++) - { - if (argv[i] == string("-infile")) - { + for (int32_t i = 0; i < argc; i++) { + if (argv[i] == string("-infile")) { infile_specified = true; i++; infile_name = argv[i]; } } - if (infile_specified == false) - { + if (infile_specified == false) { cerr << " Invalid arguments -- how to use CACTI:" << endl; cerr << " 1) cacti -infile " << endl; - cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" << endl; + cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" + << endl; cerr << " No. of arguments input - " << argc << endl; exit(1); - } - else - { + } else { result = cacti_interface(infile_name); } - } - else if (argc == 53) - { - result = cacti_interface(atoi(argv[ 1]), - atoi(argv[ 2]), - atoi(argv[ 3]), - atoi(argv[ 4]), - atoi(argv[ 5]), - atoi(argv[ 6]), - atoi(argv[ 7]), - atoi(argv[ 8]), - atoi(argv[ 9]), - atof(argv[10]), - atoi(argv[11]), - atoi(argv[12]), - atoi(argv[13]), - atoi(argv[14]), - atoi(argv[15]), - atoi(argv[16]), - atoi(argv[17]), - atoi(argv[18]), - atoi(argv[19]), - atoi(argv[20]), - atoi(argv[21]), - atoi(argv[22]), - atoi(argv[23]), - atoi(argv[24]), - atoi(argv[25]), - atoi(argv[26]), - atoi(argv[27]), - atoi(argv[28]), - atoi(argv[29]), - atoi(argv[30]), - atoi(argv[31]), - atoi(argv[32]), - atoi(argv[33]), - atoi(argv[34]), - atoi(argv[35]), - atoi(argv[36]), - atoi(argv[37]), - atoi(argv[38]), - atoi(argv[39]), - atoi(argv[40]), - atoi(argv[41]), - atoi(argv[42]), - atoi(argv[43]), - atoi(argv[44]), - atoi(argv[45]), - atoi(argv[46]), - atoi(argv[47]), - atoi(argv[48]), - atoi(argv[49]), - atoi(argv[50]), - atoi(argv[51]), - atoi(argv[52])); - } - else - { - result = cacti_interface(atoi(argv[ 1]), - atoi(argv[ 2]), - atoi(argv[ 3]), - atoi(argv[ 4]), - atoi(argv[ 5]), - atoi(argv[ 6]), - atoi(argv[ 7]), - atoi(argv[ 8]), - atof(argv[ 9]), - atoi(argv[10]), - atoi(argv[11]), - atoi(argv[12]), - atoi(argv[13]), - atoi(argv[14]), - atoi(argv[15]), - atoi(argv[16]), - atoi(argv[17]), - atoi(argv[18]), - atoi(argv[19]), - atoi(argv[20]), - atoi(argv[21]), - atoi(argv[22]), - atoi(argv[23]), - atoi(argv[24]), - atoi(argv[25]), - atoi(argv[26]), - atoi(argv[27]), - atoi(argv[28]), - atoi(argv[29]), - atoi(argv[30]), - atoi(argv[31]), - atoi(argv[32]), - atoi(argv[33]), - atoi(argv[34]), - atoi(argv[35]), - atoi(argv[36]), - atoi(argv[37]), - atoi(argv[38]), - atoi(argv[39]), - atoi(argv[40]), - atoi(argv[41]), - atoi(argv[42]), - atoi(argv[43]), - atoi(argv[44]), - atoi(argv[45]), - atoi(argv[46]), - atoi(argv[47]), - atoi(argv[48]), - atoi(argv[49]), - atoi(argv[50]), - atoi(argv[51]), - atoi(argv[52]), - atoi(argv[53]), - atoi(argv[54])); + } else if (argc == 53) { + result = cacti_interface( + atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), + atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), + atoi(argv[9]), atof(argv[10]), atoi(argv[11]), atoi(argv[12]), + atoi(argv[13]), atoi(argv[14]), atoi(argv[15]), atoi(argv[16]), + atoi(argv[17]), atoi(argv[18]), atoi(argv[19]), atoi(argv[20]), + atoi(argv[21]), atoi(argv[22]), atoi(argv[23]), atoi(argv[24]), + atoi(argv[25]), atoi(argv[26]), atoi(argv[27]), atoi(argv[28]), + atoi(argv[29]), atoi(argv[30]), atoi(argv[31]), atoi(argv[32]), + atoi(argv[33]), atoi(argv[34]), atoi(argv[35]), atoi(argv[36]), + atoi(argv[37]), atoi(argv[38]), atoi(argv[39]), atoi(argv[40]), + atoi(argv[41]), atoi(argv[42]), atoi(argv[43]), atoi(argv[44]), + atoi(argv[45]), atoi(argv[46]), atoi(argv[47]), atoi(argv[48]), + atoi(argv[49]), atoi(argv[50]), atoi(argv[51]), atoi(argv[52])); + } else { + result = cacti_interface( + atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), + atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), + atof(argv[9]), atoi(argv[10]), atoi(argv[11]), atoi(argv[12]), + atoi(argv[13]), atoi(argv[14]), atoi(argv[15]), atoi(argv[16]), + atoi(argv[17]), atoi(argv[18]), atoi(argv[19]), atoi(argv[20]), + atoi(argv[21]), atoi(argv[22]), atoi(argv[23]), atoi(argv[24]), + atoi(argv[25]), atoi(argv[26]), atoi(argv[27]), atoi(argv[28]), + atoi(argv[29]), atoi(argv[30]), atoi(argv[31]), atoi(argv[32]), + atoi(argv[33]), atoi(argv[34]), atoi(argv[35]), atoi(argv[36]), + atoi(argv[37]), atoi(argv[38]), atoi(argv[39]), atoi(argv[40]), + atoi(argv[41]), atoi(argv[42]), atoi(argv[43]), atoi(argv[44]), + atoi(argv[45]), atoi(argv[46]), atoi(argv[47]), atoi(argv[48]), + atoi(argv[49]), atoi(argv[50]), atoi(argv[51]), atoi(argv[52]), + atoi(argv[53]), atoi(argv[54])); } result.cleanup(); -// delete result.data_array2; -// if (result.tag_array2!=NULL) -// delete result.tag_array2; + // delete result.data_array2; + // if (result.tag_array2!=NULL) + // delete result.tag_array2; return 0; } - diff --git a/cacti/mat.cc b/cacti/mat.cc old mode 100755 new mode 100644 index 221369d..8012e26 --- a/cacti/mat.cc +++ b/cacti/mat.cc @@ -29,242 +29,223 @@ * ***************************************************************************/ - - #include "mat.h" -#include +#include -Mat::Mat(const DynamicParameter & dyn_p) - :dp(dyn_p), - power_subarray_out_drv(), - delay_fa_tag(0), delay_cam(0), - delay_before_decoder(0), delay_bitline(0), - delay_wl_reset(0), delay_bl_restore(0), - delay_searchline(0), delay_matchchline(0), - delay_cam_sl_restore(0), delay_cam_ml_reset(0), - delay_fa_ram_wl(0),delay_hit_miss_reset(0), - delay_hit_miss(0), - subarray(dp, dp.fully_assoc), - power_bitline(), per_bitline_read_energy(0), - deg_bl_muxing(dp.deg_bl_muxing), - num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), - delay_writeback(0), - cell(subarray.cell), cam_cell(subarray.cam_cell), - is_dram(dyn_p.is_dram), - pure_cam(dyn_p.pure_cam), - num_mats(dp.num_mats), - power_sa(), delay_sa(0), - leak_power_sense_amps_closed_page_state(0), - leak_power_sense_amps_open_page_state(0), - delay_subarray_out_drv(0), - delay_comparator(0), power_comparator(), - num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), - num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), - num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir), - array_leakage(0), - wl_leakage(0), - cl_leakage(0), - sram_sleep_tx(0) - { +Mat::Mat(const DynamicParameter &dyn_p) + : dp(dyn_p), power_subarray_out_drv(), delay_fa_tag(0), delay_cam(0), + delay_before_decoder(0), delay_bitline(0), delay_wl_reset(0), + delay_bl_restore(0), delay_searchline(0), delay_matchchline(0), + delay_cam_sl_restore(0), delay_cam_ml_reset(0), delay_fa_ram_wl(0), + delay_hit_miss_reset(0), delay_hit_miss(0), subarray(dp, dp.fully_assoc), + power_bitline(), per_bitline_read_energy(0), + deg_bl_muxing(dp.deg_bl_muxing), + num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), delay_writeback(0), + cell(subarray.cell), cam_cell(subarray.cam_cell), is_dram(dyn_p.is_dram), + pure_cam(dyn_p.pure_cam), num_mats(dp.num_mats), power_sa(), delay_sa(0), + leak_power_sense_amps_closed_page_state(0), + leak_power_sense_amps_open_page_state(0), delay_subarray_out_drv(0), + delay_comparator(0), power_comparator(), num_do_b_mat(dyn_p.num_do_b_mat), + num_so_b_mat(dyn_p.num_so_b_mat), + num_subarrays_per_mat(dp.num_subarrays / dp.num_mats), + num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir), array_leakage(0), + wl_leakage(0), cl_leakage(0), sram_sleep_tx(0) { assert(num_subarrays_per_mat <= 4); assert(num_subarrays_per_row <= 2); is_fa = (dp.fully_assoc) ? true : false; - camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. + camFlag = (is_fa || pure_cam); // although cam_cell.w = cell.w for fa, we + // still differentiate them. if (is_fa || pure_cam) - num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; + num_subarrays_per_row = num_subarrays_per_mat > 2 + ? num_subarrays_per_mat / 2 + : num_subarrays_per_mat; if (dp.use_inp_params == 1) { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { RWP = g_ip->num_rw_ports; ERP = g_ip->num_rd_ports; EWP = g_ip->num_wr_ports; SCHP = g_ip->num_search_ports; - } double number_sa_subarray; - if (!is_fa && !pure_cam) - { - number_sa_subarray = subarray.num_cols / deg_bl_muxing; - } - else if (is_fa && !pure_cam) - { - number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + if (!is_fa && !pure_cam) { + number_sa_subarray = subarray.num_cols / deg_bl_muxing; + } else if (is_fa && !pure_cam) { + number_sa_subarray = + (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; } - else - { - number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; + else { + number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; } - int num_dec_signals = subarray.num_rows; - double C_ld_bit_mux_dec_out = 0; + int num_dec_signals = subarray.num_rows; + double C_ld_bit_mux_dec_out = 0; double C_ld_sa_mux_lev_1_dec_out = 0; double C_ld_sa_mux_lev_2_dec_out = 0; double R_wire_wl_drv_out; - if (!is_fa && !pure_cam) - { - R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; - } - else if (is_fa && !pure_cam) - { - R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; - } - else - { - R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; - } + if (!is_fa && !pure_cam) { + R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + } else if (is_fa && !pure_cam) { + R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + + subarray.num_cols_fa_ram * cell.w) * + g_tp.wire_local.R_per_um; + } else { + R_wire_wl_drv_out = + (subarray.num_cols_fa_cam * cam_cell.w) * g_tp.wire_local.R_per_um; + } - double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA - double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; + double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.R_per_um * + cell.w; // TODO:revisit for FA + double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.R_per_um * cell.w; - if (deg_bl_muxing > 1) - { + if (deg_bl_muxing > 1) { C_ld_bit_mux_dec_out = - (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) * + gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); } - if (dp.Ndsam_lev_1 > 1) - { + if (dp.Ndsam_lev_1 > 1) { C_ld_sa_mux_lev_1_dec_out = - (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); } - if (dp.Ndsam_lev_2 > 1) - { - C_ld_sa_mux_lev_2_dec_out = - (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + if (dp.Ndsam_lev_2 > 1) { + C_ld_sa_mux_lev_2_dec_out = (num_subarrays_per_mat * number_sa_subarray / + (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * + cell.get_w(); } - if (num_subarrays_per_row >= 2) - { + if (num_subarrays_per_row >= 2) { // wire heads for both right and left side of a mat, so half the resistance R_wire_bit_mux_dec_out /= 2.0; - R_wire_sa_mux_dec_out /= 2.0; + R_wire_sa_mux_dec_out /= 2.0; } - - row_dec = new Decoder( - num_dec_signals, - false, - subarray.C_wl, - R_wire_wl_drv_out, - false/*is_fa*/, - is_dram, - true, - camFlag? cam_cell:cell, - g_ip->power_gating? true:false, - subarray.num_rows); - -// row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for OOO programming -// if (is_fa && (!dp.is_tag)) -// { -// row_dec->exist = true; -// } - bit_mux_dec = new Decoder( - deg_bl_muxing,// This number is 1 for FA or CAM - false, - C_ld_bit_mux_dec_out, - R_wire_bit_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell, - g_ip->power_gating? true:false); - sa_mux_lev_1_dec = new Decoder( - dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM - dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal - C_ld_sa_mux_lev_1_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell, - g_ip->power_gating? true:false); - sa_mux_lev_2_dec = new Decoder( - dp.Ndsam_lev_2, // This number is 1 for FA or CAM - false, - C_ld_sa_mux_lev_2_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell, - g_ip->power_gating? true:false); + row_dec = + new Decoder(num_dec_signals, false, subarray.C_wl, R_wire_wl_drv_out, + false /*is_fa*/, is_dram, true, camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false, subarray.num_rows); + + // row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for + // OOO programming if (is_fa && (!dp.is_tag)) + // { + // row_dec->exist = true; + // } + bit_mux_dec = + new Decoder(deg_bl_muxing, // This number is 1 for FA or CAM + false, C_ld_bit_mux_dec_out, R_wire_bit_mux_dec_out, + false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false); + sa_mux_lev_1_dec = + new Decoder(dp.deg_senseamp_muxing_non_associativity, // This number is 1 + // for FA or CAM + dp.number_way_select_signals_mat + ? true + : false, // only sa_mux_lev_1_dec needs way select signal + C_ld_sa_mux_lev_1_dec_out, R_wire_sa_mux_dec_out, + false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false); + sa_mux_lev_2_dec = + new Decoder(dp.Ndsam_lev_2, // This number is 1 for FA or CAM + false, C_ld_sa_mux_lev_2_dec_out, R_wire_sa_mux_dec_out, + false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false); double C_wire_predec_blk_out; double R_wire_predec_blk_out; - if (!is_fa && !pure_cam) - { - - C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; - R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; - - } - else //for pre-decode block's load is same for both FA and CAM - { - C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; - R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; - } - - - if (is_fa||pure_cam) - num_dec_signals += _log2(num_subarrays_per_mat); - - PredecBlk * r_predec_blk1 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - true); - PredecBlk * r_predec_blk2 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - false); - PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); - PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); - dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); - dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); - - PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); - PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); - way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); - dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); - - r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); - b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); - sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); - sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); - - subarray_out_wire = new Wire(Global, (g_ip->cl_vertical?subarray.area.w:subarray.area.h),1,1,inside_mat);//should be subarray.area.w; if with /2 means average length + if (!is_fa && !pure_cam) { + + C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * + g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * + g_tp.wire_inside_mat.R_per_um * cell.h; + + } else // for pre-decode block's load is same for both FA and CAM + { + C_wire_predec_blk_out = + subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; + R_wire_predec_blk_out = + subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; + } + + if (is_fa || pure_cam) + num_dec_signals += _log2(num_subarrays_per_mat); + + PredecBlk *r_predec_blk1 = new PredecBlk( + num_dec_signals, row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, + num_subarrays_per_mat, is_dram, true); + PredecBlk *r_predec_blk2 = new PredecBlk( + num_dec_signals, row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, + num_subarrays_per_mat, is_dram, false); + PredecBlk *b_mux_predec_blk1 = + new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); + PredecBlk *b_mux_predec_blk2 = + new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); + PredecBlk *sa_mux_lev_1_predec_blk1 = + new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, + sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + PredecBlk *sa_mux_lev_1_predec_blk2 = + new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, + sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + PredecBlk *sa_mux_lev_2_predec_blk1 = + new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); + PredecBlk *sa_mux_lev_2_predec_blk2 = + new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); + dummy_way_sel_predec_blk1 = + new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); + dummy_way_sel_predec_blk2 = + new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); + + PredecBlkDrv *r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); + PredecBlkDrv *r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); + PredecBlkDrv *b_mux_predec_blk_drv1 = + new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); + PredecBlkDrv *b_mux_predec_blk_drv2 = + new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); + PredecBlkDrv *sa_mux_lev_1_predec_blk_drv1 = + new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); + PredecBlkDrv *sa_mux_lev_1_predec_blk_drv2 = + new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); + PredecBlkDrv *sa_mux_lev_2_predec_blk_drv1 = + new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); + PredecBlkDrv *sa_mux_lev_2_predec_blk_drv2 = + new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); + way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, + dummy_way_sel_predec_blk1, is_dram); + dummy_way_sel_predec_blk_drv2 = + new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); + + r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); + b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); + sa_mux_lev_1_predec = + new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); + sa_mux_lev_2_predec = + new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); + + subarray_out_wire = + new Wire(Global, (g_ip->cl_vertical ? subarray.area.w : subarray.area.h), + 1, 1, inside_mat); // should be subarray.area.w; if with /2 + // means average length double driver_c_gate_load; double driver_c_wire_load; @@ -272,208 +253,224 @@ Mat::Mat(const DynamicParameter & dyn_p) if (is_fa || pure_cam) - { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same - driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - cam_bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - if (!pure_cam) - { - //This is only used for fully asso not pure CAM - driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } + { // Although CAM and RAM use different bl pre-charge driver, assuming the + // precharge p size is the same + driver_c_gate_load = + (subarray.num_cols_fa_cam) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, + false, false); + driver_c_wire_load = + subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = + subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + cam_bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, + driver_r_wire_load, is_dram); + + if (!pure_cam) { + // This is only used for fully asso not pure CAM + driver_c_gate_load = + (subarray.num_cols_fa_ram) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, + false, false); + driver_c_wire_load = + subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = + subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, + driver_r_wire_load, is_dram); + } } - else - { - driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + else { + driver_c_gate_load = + subarray.num_cols * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, + false, false); + driver_c_wire_load = + subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = + subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, + driver_r_wire_load, is_dram); } - double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); - double w_row_decoder = area_row_decoder / subarray.area.get_h(); + double area_row_decoder = + row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); + double w_row_decoder = area_row_decoder / subarray.area.get_h(); double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = - compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); - - /* This means the subarray drivers are along the vertical (y) direction since / subarray.area.get_w() is used; - * so the subarray_out_wire (actually the drivers) under the subarray and along the horizontal (x) direction - * So as mentioned above @ line 271 - * subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//should be subarray.area.w - * change the out_wire (driver to along y direction need carefully rethinking - * rather than just simply switch w with h ) + compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + + /* This means the subarray drivers are along the vertical (y) direction since + * / subarray.area.get_w() is used; so the subarray_out_wire (actually the + * drivers) under the subarray and along the horizontal (x) direction So as + * mentioned above @ line 271 subarray_out_wire = new Wire(g_ip->wt, + * subarray.area.h);//should be subarray.area.w change the out_wire (driver to + * along y direction need carefully rethinking rather than just simply switch + * w with h ) * */ - double h_subarray_out_drv = subarray_out_wire->area.get_area() * - (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); - + double h_subarray_out_drv = + subarray_out_wire->area.get_area() * + (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / + subarray.area.get_w(); h_subarray_out_drv *= (RWP + ERP + SCHP); - double h_comparators = 0.0; + double h_comparators = 0.0; double w_row_predecode_output_wires = 0.0; - double h_bit_mux_dec_out_wires = 0.0; + double h_bit_mux_dec_out_wires = 0.0; double h_senseamp_mux_dec_out_wires = 0.0; - if ((!is_fa)&&(dp.is_tag)) - { - //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; - h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); + if ((!is_fa) && (dp.is_tag)) { + // tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * + // dp.Ndsam_lev_2)) / num_do_b_mat; + h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, + subarray.area.get_w()); h_comparators *= (RWP + ERP); } - //power-gating circuit + // power-gating circuit bool is_footer = false; - double Isat_subarray = 2* simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, true);//only one wordline active in a subarray 2 means two inverters in an SRAM cell + double Isat_subarray = + 2 * simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, + true); // only one wordline active in a subarray + // 2 means two inverters in an SRAM cell double detalV_array, deltaV_wl, deltaV_floatingBL; double c_wakeup_array; - if (!(is_fa || pure_cam) && g_ip->power_gating) - {//for SRAM only at this moment - c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true);//1 inv - c_wakeup_array += 2*drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true) - + drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, true);//1 inv - c_wakeup_array *= subarray.num_rows;//all the SRAM cells in a bitline is connected to the sleep tx to provide Vcc_min - detalV_array = g_tp.sram_cell.Vdd-g_tp.sram_cell.Vcc_min; - - sram_sleep_tx = new Sleep_tx (g_ip->perfloss, - Isat_subarray, - is_footer, - c_wakeup_array, - detalV_array, - 1, - cell); - - subarray.area.set_h(subarray.area.h+ sram_sleep_tx->area.h); - - //TODO: add the sleep tx in the wl driver and + if (!(is_fa || pure_cam) && + g_ip->power_gating) { // for SRAM only at this moment + c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, + true); // 1 inv + c_wakeup_array += + 2 * drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true) + + drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, + true); // 1 inv + c_wakeup_array *= + subarray.num_rows; // all the SRAM cells in a bitline is connected to + // the sleep tx to provide Vcc_min + detalV_array = g_tp.sram_cell.Vdd - g_tp.sram_cell.Vcc_min; + + sram_sleep_tx = new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, + c_wakeup_array, detalV_array, 1, cell); + + subarray.area.set_h(subarray.area.h + sram_sleep_tx->area.h); + + // TODO: add the sleep tx in the wl driver and } - - int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); - int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); - w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * + int branch_effort_predec_blk1_out = + (1 << r_predec_blk2->number_input_addr_bits); + int branch_effort_predec_blk2_out = + (1 << r_predec_blk1->number_input_addr_bits); + w_row_predecode_output_wires = + (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + double h_non_cell_area = + (num_subarrays_per_mat / num_subarrays_per_row) * + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv + h_comparators); - double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * - (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + - h_subarray_out_drv + h_comparators); - - double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); + double w_non_cell_area = + MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); - if (deg_bl_muxing > 1) - { - h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); + if (deg_bl_muxing > 1) { + h_bit_mux_dec_out_wires = + deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); } - if (dp.Ndsam_lev_1 > 1) - { - h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + if (dp.Ndsam_lev_1 > 1) { + h_senseamp_mux_dec_out_wires = + dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); } - if (dp.Ndsam_lev_2 > 1) - { - h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + if (dp.Ndsam_lev_2 > 1) { + h_senseamp_mux_dec_out_wires += + dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); } double h_addr_datain_wires; - if (!g_ip->ver_htree_wires_over_array) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + - (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - - if (is_fa || pure_cam) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit - (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + - (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; + if (!g_ip->ver_htree_wires_over_array) { + h_addr_datain_wires = + (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + + (dp.num_di_b_mat + dp.num_do_b_mat) / num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + if (is_fa || pure_cam) { + h_addr_datain_wires = + (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + // TODO: revisit + (dp.num_di_b_mat + dp.num_do_b_mat) / num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + + (dp.num_si_b_mat + dp.num_so_b_mat) / num_subarrays_per_row * + g_tp.wire_inside_mat.pitch * SCHP; } - //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + - //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); - h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + - h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + - h_addr_datain_wires + - h_bit_mux_dec_out_wires + - h_senseamp_mux_dec_out_wires; - + // h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + + // MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); + h_non_cell_area = + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_comparators + h_subarray_out_drv) * + (num_subarrays_per_mat / num_subarrays_per_row) + + h_addr_datain_wires + h_bit_mux_dec_out_wires + + h_senseamp_mux_dec_out_wires; } // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; - double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + - b_mux_predec_blk_drv1->area.get_area() + - sa_mux_lev_1_predec_blk_drv1->area.get_area() + - sa_mux_lev_2_predec_blk_drv1->area.get_area() + - way_sel_drv1->area.get_area() + - r_predec_blk_drv2->area.get_area() + - b_mux_predec_blk_drv2->area.get_area() + - sa_mux_lev_1_predec_blk_drv2->area.get_area() + - sa_mux_lev_2_predec_blk_drv2->area.get_area() + - r_predec_blk1->area.get_area() + - b_mux_predec_blk1->area.get_area() + - sa_mux_lev_1_predec_blk1->area.get_area() + - sa_mux_lev_2_predec_blk1->area.get_area() + - r_predec_blk2->area.get_area() + - b_mux_predec_blk2->area.get_area() + - sa_mux_lev_1_predec_blk2->area.get_area() + - sa_mux_lev_2_predec_blk2->area.get_area() + - bit_mux_dec->area.get_area() + - sa_mux_lev_1_dec->area.get_area() + - sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + double area_mat_center_circuitry = + (r_predec_blk_drv1->area.get_area() + + b_mux_predec_blk_drv1->area.get_area() + + sa_mux_lev_1_predec_blk_drv1->area.get_area() + + sa_mux_lev_2_predec_blk_drv1->area.get_area() + + way_sel_drv1->area.get_area() + r_predec_blk_drv2->area.get_area() + + b_mux_predec_blk_drv2->area.get_area() + + sa_mux_lev_1_predec_blk_drv2->area.get_area() + + sa_mux_lev_2_predec_blk_drv2->area.get_area() + + r_predec_blk1->area.get_area() + b_mux_predec_blk1->area.get_area() + + sa_mux_lev_1_predec_blk1->area.get_area() + + sa_mux_lev_2_predec_blk1->area.get_area() + + r_predec_blk2->area.get_area() + b_mux_predec_blk2->area.get_area() + + sa_mux_lev_1_predec_blk2->area.get_area() + + sa_mux_lev_2_predec_blk2->area.get_area() + + bit_mux_dec->area.get_area() + sa_mux_lev_1_dec->area.get_area() + + sa_mux_lev_2_dec->area.get_area()) * + (RWP + ERP + EWP); double area_efficiency_mat; -// if (!is_fa) -// { - assert(num_subarrays_per_mat/num_subarrays_per_row>0); - area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; - area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; - area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; - area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); - -// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<0); - assert(area.w>0); -// } -// else -// { -// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area; -// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; -// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; -// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); -// } - } - - + // if (!is_fa) + // { + assert(num_subarrays_per_mat / num_subarrays_per_row > 0); + area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.h + + h_non_cell_area; + area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; + area.w = (area.h * area.w + area_mat_center_circuitry) / area.h; + area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * + 100.0 / area.get_area(); + + // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"< 0); + assert(area.w > 0); + // } + // else + // { + // area.h = (num_subarrays_per_mat / num_subarrays_per_row) * + // subarray.area.get_h() + h_non_cell_area; area.w = num_subarrays_per_row + // * subarray.area.get_w() + w_non_cell_area; area.w = (area.h*area.w + + // area_mat_center_circuitry) / area.h; area_efficiency_mat = + // subarray.area.get_area() * num_subarrays_per_row * 100.0 / + // area.get_area(); + // } +} -Mat::~Mat() -{ +Mat::~Mat() { delete row_dec; delete bit_mux_dec; delete sa_mux_lev_1_dec; @@ -510,113 +507,108 @@ Mat::~Mat() if (!pure_cam) delete bl_precharge_eq_drv; - if (is_fa || pure_cam) - { - delete sl_precharge_eq_drv ; - delete sl_data_drv ; + if (is_fa || pure_cam) { + delete sl_precharge_eq_drv; + delete sl_data_drv; delete cam_bl_precharge_eq_drv; delete ml_precharge_drv; delete ml_to_ram_wl_drv; } - if (sram_sleep_tx !=0) - { - delete sram_sleep_tx; + if (sram_sleep_tx != 0) { + delete sram_sleep_tx; } } +double Mat::compute_delays(double inrisetime) { + int k; + double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl; + double outrisetime_search, outrisetime, row_dec_outrisetime; + // delay calculation for tags of fully associative cache + if (is_fa || pure_cam) { + // Compute search access time + outrisetime_search = compute_cam_delay(inrisetime); + if (is_fa) { + bl_precharge_eq_drv->compute_delay(0); + k = ml_to_ram_wl_drv->number_gates - 1; + rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, + 4 * cell.h, is_dram, false, true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, + 4 * cell.h, is_dram, false, true); + C_ld = ml_to_ram_wl_drv->c_gate_load + ml_to_ram_wl_drv->c_wire_load; + tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + + R_bl_precharge = + tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + r_b_metal = cam_cell.h * + g_tp.wire_local.R_per_um; // dummy rows in sram are filled in + R_bl = subarray.num_rows * r_b_metal; + C_bl = subarray.C_bl; + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + outrisetime_search = compute_bitline_delay(outrisetime_search); + outrisetime_search = compute_sa_delay(outrisetime_search); + } + outrisetime_search = compute_subarray_out_drv(outrisetime_search); + subarray_out_wire->set_in_rise_time(outrisetime_search); + outrisetime_search = subarray_out_wire->signal_rise_time(); + delay_subarray_out_drv_htree = + delay_subarray_out_drv + subarray_out_wire->delay; + // TODO: this is just for compute plain read/write energy for fa and cam, + // plain read/write access timing need to be revisited. + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); -double Mat::compute_delays(double inrisetime) -{ - int k; - double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; - double outrisetime_search, outrisetime, row_dec_outrisetime; - // delay calculation for tags of fully associative cache - if (is_fa || pure_cam) - { - //Compute search access time - outrisetime_search = compute_cam_delay(inrisetime); - if (is_fa) - { - bl_precharge_eq_drv->compute_delay(0); - k = ml_to_ram_wl_drv->number_gates - 1; - rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); - C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; - tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - - R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in - R_bl = subarray.num_rows * r_b_metal; - C_bl = subarray.C_bl; - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - - - outrisetime_search = compute_bitline_delay(outrisetime_search); - outrisetime_search = compute_sa_delay(outrisetime_search); - } - outrisetime_search = compute_subarray_out_drv(outrisetime_search); - subarray_out_wire->set_in_rise_time(outrisetime_search); - outrisetime_search = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - - - //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - if (pure_cam) - { - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - } - return outrisetime_search; - } - else - { - bl_precharge_eq_drv->compute_delay(0); - if (row_dec->exist == true) - { - int k = row_dec->num_gates - 1; - double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); - // TODO: this 4*cell.h number must be revisited - double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - double C_ld = row_dec->C_ld_dec_out; - double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - } - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - double r_b_metal = cell.h * g_tp.wire_local.R_per_um; - double R_bl = subarray.num_rows * r_b_metal; - double C_bl = subarray.C_bl; - - if (is_dram) - { - delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - else - { - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - } + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + if (pure_cam) { + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + } + return outrisetime_search; + } else { + bl_precharge_eq_drv->compute_delay(0); + if (row_dec->exist == true) { + int k = row_dec->num_gates - 1; + double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); + // TODO: this 4*cell.h number must be revisited + double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * cell.h, + is_dram, false, true) + + drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, + is_dram, false, true); + double C_ld = row_dec->C_ld_dec_out; + double tf = + rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + } + double R_bl_precharge = + tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + double r_b_metal = cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * r_b_metal; + double C_bl = subarray.C_bl; + + if (is_dram) { + delay_bl_restore = bl_precharge_eq_drv->delay + + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } else { + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } + } outrisetime = r_predec->compute_delays(inrisetime); row_dec_outrisetime = row_dec->compute_delays(outrisetime); @@ -636,64 +628,75 @@ double Mat::compute_delays(double inrisetime) subarray_out_wire->set_in_rise_time(outrisetime); outrisetime = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + delay_subarray_out_drv_htree = + delay_subarray_out_drv + subarray_out_wire->delay; - if (dp.is_tag == true && dp.fully_assoc == false) - { + if (dp.is_tag == true && dp.fully_assoc == false) { compute_comparator_delay(0); } - if (row_dec->exist == false) - { - delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); - } + if (row_dec->exist == false) { + delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + } -// cout<<"r_predec delay=" <delay<<" row_dec delay = " <delay<delay, delay_hit_miss); + delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd; - /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ + /* TODO: peripheral-- Priority Encoder, usually this is not necessary in + * processor components*/ power_matchline.searchOp.dynamic = dynSearchEng; - //leakage in one subarray - double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? - double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); - double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv - - leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; - leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; + // leakage in one subarray + double Iport = + cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, + true); // TODO: how much is the idle time? just by *2? + double Iport_erp = + cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); + double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * + 2; + double Icell_comparator = + cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true) * + 2; // approx XOR with Inv + + leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; + leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; - leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; - leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports - - power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + - leak_comparator_cam_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP + - leak_power_SCHP_port_sram_cell*SCHP; -// power_matchline.searchOp.leakage += leak_comparator_cam_cell; - power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; - //In idle states, the hit/miss txs are closed (on) therefore no Isub - power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ - // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; - - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; - - gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; - gate_leak_power_SCHP_port_sram_cell = 0; - - //cout<<"power_matchline.searchOp.leakage"<array_power_gated? g_tp.sram_cell.Vcc_min : g_tp.sram_cell.Vdd); -// leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd); -// leak_power_RD_port_sram_cell = Iport_erp * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd); - - leak_power_cc_inverters_sram_cell_gated = Icell * g_tp.sram_cell.Vcc_min; - leak_power_acc_tr_RW_or_WR_port_sram_cell_gated = Iport * g_tp.sram.Vbitfloating; - leak_power_RD_port_sram_cell_gated = Iport_erp * g_tp.sram.Vbitfloating; -// -// leak_power_cc_inverters_sram_cell_gated = leak_power_cc_inverters_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram_cell.Vcc_min; -// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = leak_power_acc_tr_RW_or_WR_port_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; -// leak_power_RD_port_sram_cell_floating = leak_power_RD_port_sram_cell_floating/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; -// - - - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true); - - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; + // Leakage current of an SRAM cell + double Iport = + cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, false, + true); // TODO: how much is the idle time? just by *2? + double Iport_erp = + cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos, false, true); + double Icell = + cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv, + false, true) * + 2; // two invs per cell + + leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd; + + // leak_power_cc_inverters_sram_cell = Icell * + // (g_ip->array_power_gated? g_tp.sram_cell.Vcc_min : + // g_tp.sram_cell.Vdd); leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport + // * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : + // g_tp.sram_cell.Vdd); leak_power_RD_port_sram_cell = + // Iport_erp * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : + // g_tp.sram_cell.Vdd); + + leak_power_cc_inverters_sram_cell_gated = Icell * g_tp.sram_cell.Vcc_min; + leak_power_acc_tr_RW_or_WR_port_sram_cell_gated = + Iport * g_tp.sram.Vbitfloating; + leak_power_RD_port_sram_cell_gated = Iport_erp * g_tp.sram.Vbitfloating; + // + // leak_power_cc_inverters_sram_cell_gated = + // leak_power_cc_inverters_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram_cell.Vcc_min; + // leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = + // leak_power_acc_tr_RW_or_WR_port_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; + // leak_power_RD_port_sram_cell_floating = + // leak_power_RD_port_sram_cell_floating/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; + // + + // in idle state, Ig_on only possibly exist in access transistors of read + // only ports + double Ig_port_erp = + cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, false, true); + double Ig_cell = cmos_Ig_leakage( + g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv, false, true); + + gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; } - - double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram); + double C_drain_bit_mux = drain_C_( + g_tp.w_nmos_b_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w / (2 * (RWP + ERP + SCHP)), is_dram); double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); - double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_drain_sense_amp_iso = drain_C_( + g_tp.w_iso, PCH, 1, 0, + camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram); double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); - double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + - drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); - double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_sense_amp_latch = + gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, + camFlag ? cam_cell.w + : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, + camFlag ? cam_cell.w + : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram); + double C_drain_sense_amp_mux = drain_C_( + g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), + is_dram); - if (is_dram) - { - double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl)); + if (is_dram) { + double fraction = + dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl)); tstep = 2.3 * fraction * r_dev * - (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) / - (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)); + (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)) / + (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)); delay_writeback = tstep; - dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100; - per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; - } - else - { + dynRdEnergy += + (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += + (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * + num_act_mats_hor_dir * 100; + per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; + } else { double tau; - if (deg_bl_muxing > 1) - { + if (deg_bl_muxing > 1) { tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* - subarray.num_cols * num_subarrays_per_mat*/; + (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * + g_tp.sram_cell.Vdd /* +subarray.num_cols * num_subarrays_per_mat*/ + ; blfloating_c += (C_bl + 2 * C_drain_bit_mux) * 2; - dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); - blfloating_c += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *2; - dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; - //Write Ops are differential for SRAM - - } - else - { + dynRdEnergy += + (2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * + (1.0 /*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); + blfloating_c += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2; + dynWriteEnergy += + ((1.0 /*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / + deg_senseamp_muxing) * + num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + // Write Ops are differential for SRAM + + } else { tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - - blfloating_c += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 2; - dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; - + (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) + + R_bl * C_bl / 2 + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux); + dynRdEnergy += + (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * + g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + + blfloating_c += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2; + dynWriteEnergy += (((1.0 /*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing) / + deg_senseamp_muxing) * + num_act_mats_hor_dir * C_bl) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; } tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); -// cout<<"R_cell_pull_down ="<repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + C_ld = dp.Ndsam_lev_2 * + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w + : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / + (RWP + ERP + SCHP), + is_dram) + + // gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + + // p_to_n_sz_r), 0.0, is_dram); + gate_C(subarray_out_wire->repeater_size * + (subarray_out_wire->wire_length / + subarray_out_wire->repeater_spacing) * + g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), + 0.0, is_dram); tf = rd * C_ld; this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += + C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += + 0; // for now, let leakage of the pass transistor be 0 power_subarray_out_drv.readOp.power_gated_leakage += 0; - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; - + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; return inrisetime; } - - -double Mat::compute_comparator_delay(double inrisetime) -{ +double Mat::compute_comparator_delay(double inrisetime) { int A = g_ip->tag_assoc; - int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already + int tagbits_ = + dp.tagbits / + 4; // Assuming there are 4 quarter comparators. input tagbits is already // a multiple of 4. /* First Inverter */ - double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double Ceq = + gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); - double tf = Req*Ceq; - double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); - double nextinputtime = st1del/VTHCOMPINV; - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - - //For each degree of associativity - //there are 4 such quarter comparators - double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; + double tf = Req * Ceq; + double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL); + double nextinputtime = st1del / VTHCOMPINV; + power_comparator.readOp.dynamic += + 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + + // For each degree of associativity + // there are 4 such quarter comparators + double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, + 1, inv, is_dram) * + 4 * A; + double gatelkgCurrent = + cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram) * + 4 * A; /* Second Inverter */ - Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); - tf = Req*Ceq; - double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); - nextinputtime = st2del/(1.0-VTHCOMPINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; + tf = Req * Ceq; + double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE); + nextinputtime = st2del / (1.0 - VTHCOMPINV); + power_comparator.readOp.dynamic += + 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * + 4 * A; + gatelkgCurrent += + cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram) * + 4 * A; /* Third Inverter */ - Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); - tf = Req*Ceq; - double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); - nextinputtime = st3del/(VTHEVALINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; + tf = Req * Ceq; + double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL); + nextinputtime = st3del / (VTHEVALINV); + power_comparator.readOp.dynamic += + 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, + inv, is_dram) * + 4 * A; + gatelkgCurrent += + cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram) * + 4 * A; /* Final Inverter (virtual ground driver) discharging compare part */ - double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); - double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ - double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); - double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); - power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); - lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 - - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter + double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram); + double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */ + double c2 = + (tagbits_) * + (drain_C_(g_tp.w_comp_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double c1 = + (tagbits_) * + (drain_C_(g_tp.w_comp_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram); + power_comparator.readOp.dynamic += + 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + power_comparator.readOp.dynamic += + c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); + lkgCurrent += + cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram) * + 4 * A; + lkgCurrent += + cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram) * 4 * + A; // stack factor of 0.2 + + gatelkgCurrent += + cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram) * + 4 * A; + gatelkgCurrent += + cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram) * 4 * + A; // for gate leakage this equals to a inverter /* time to go to threshold of mux driver */ - double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); + double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND); /* take into account non-zero input rise time */ - double m = g_tp.peri_global.Vdd/nextinputtime; + double m = g_tp.peri_global.Vdd / nextinputtime; double Tcomparatorni; - if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) - { + if ((tstep) <= (0.5 * (g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) { double a = m; - double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); + double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); + double c = + -2 * (tstep) * (g_tp.peri_global.Vdd - g_tp.peri_global.Vth) + + 1 / m * ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); + Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a); + } else { + Tcomparatorni = (tstep) + + (g_tp.peri_global.Vdd + g_tp.peri_global.Vth) / (2 * m) - + (g_tp.peri_global.Vdd * VTHEVALINV) / m; } - else - { - Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; - } - delay_comparator = Tcomparatorni+st1del+st2del+st3del; + delay_comparator = Tcomparatorni + st1del + st2del + st3del; power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; - power_comparator.readOp.power_gated_leakage = lkgCurrent * g_tp.peri_global.Vcc_min; + power_comparator.readOp.power_gated_leakage = + lkgCurrent * g_tp.peri_global.Vcc_min; power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; - return Tcomparatorni / (1.0 - VTHMUXNAND);; + return Tcomparatorni / (1.0 - VTHMUXNAND); + ; } +void Mat::compute_power_energy() { + // for cam and FA, power.readOp is the plain read power, power.searchOp is the + // associative search related power + // when search all subarrays and all mats are fully active + // when plain read/write only one subarray in a single mat is active. - -void Mat::compute_power_energy() -{ - //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power - //when search all subarrays and all mats are fully active - //when plain read/write only one subarray in a single mat is active. - - // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. + // add energy consumed in predecoder drivers. This unit is shared by all + // subarrays in a mat. power.readOp.dynamic += r_predec->power.readOp.dynamic + b_mux_predec->power.readOp.dynamic + sa_mux_lev_1_predec->power.readOp.dynamic + sa_mux_lev_2_predec->power.readOp.dynamic; // add energy consumed in decoders - power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; - if (!(is_fa||pure_cam)) - power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; + power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; + if (!(is_fa || pure_cam)) + power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; // add energy consumed in bitline prechagers, SAs, and bitlines - if (!(is_fa||pure_cam)) - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; - - // add energy consumed in bitlines - //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; + if (!(is_fa || pure_cam)) { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = + bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + + // Add sense amps energy + num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat; + + // add energy consumed in bitlines + // cout<<"bitline power"<power.readOp.dynamic) * + num_do_b_mat; + + power.readOp.dynamic += + power_bl_precharge_eq_drv.readOp.dynamic + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; } - else if (is_fa) - { - //for plain read/write only one subarray in a mat is active - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic - + cam_bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - - //Add sense amps energy - num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; - num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; - power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; - power_sa.readOp.dynamic *= num_sa_subarray; - - - // add energy consumed in bitlines - power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; - power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; - - //Add subarray output energy - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - //add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - - } - else - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; - power_sa.searchOp.dynamic = 0; - - power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; - power_bitline.searchOp.dynamic = 0; - power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; - - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - - ////add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - + else if (is_fa) { + // for plain read/write only one subarray in a mat is active + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = + bl_precharge_eq_drv->power.readOp.dynamic + + cam_bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.searchOp.dynamic = + bl_precharge_eq_drv->power.readOp.dynamic; + + // Add sense amps energy + num_sa_subarray = + (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing; + power_sa.searchOp.dynamic = + power_sa.readOp.dynamic * num_sa_subarray_search; + power_sa.readOp.dynamic *= num_sa_subarray; + + // add energy consumed in bitlines + power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; + power_bitline.readOp.dynamic *= + (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram); + power_bitline.writeOp.dynamic *= + (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram); + power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; + + // Add subarray output energy + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + + subarray_out_wire->power.readOp.dynamic) * + num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + + subarray_out_wire->power.readOp.dynamic) * + num_do_b_mat; + + power.readOp.dynamic += + power_bl_precharge_eq_drv.readOp.dynamic + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + // add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = + power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * + num_subarrays_per_mat; + ; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + // power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + + } else { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = + cam_bl_precharge_eq_drv->power.readOp.dynamic; + // power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + // power_bl_precharge_eq_drv.searchOp.dynamic = + // cam_bl_precharge_eq_drv->power.readOp.dynamic; + // power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; + + // Add sense amps energy + num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray; //*num_subarrays_per_mat; + power_sa.searchOp.dynamic = 0; + + power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.searchOp.dynamic = 0; + power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; + + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + + subarray_out_wire->power.readOp.dynamic) * + num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + + subarray_out_wire->power.readOp.dynamic) * + num_do_b_mat; + + power.readOp.dynamic += + power_bl_precharge_eq_drv.readOp.dynamic + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + ////add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = + power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * + num_subarrays_per_mat; + ; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + // power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; } - int number_output_drivers_subarray; -// // calculate leakage power - if (!(is_fa || pure_cam)) - { - number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bitline.readOp.power_gated_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - //bl precharge drv is not power gated to turn off the precharge and equalization circuit (PMOS, thus turn-off signal is "1") for bitline floating - power_bl_precharge_eq_drv.readOp.power_gated_leakage = bl_precharge_eq_drv->power.readOp.power_gated_leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + // // calculate leakage power + if (!(is_fa || pure_cam)) { + number_output_drivers_subarray = + num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + power_bitline.readOp.leakage *= + subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bitline.readOp.power_gated_leakage *= + subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = + bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + // bl precharge drv is not power gated to turn off the precharge and + // equalization circuit (PMOS, thus turn-off signal is "1") for bitline + // floating + power_bl_precharge_eq_drv.readOp.power_gated_leakage = + bl_precharge_eq_drv->power.readOp.power_gated_leakage * + num_subarrays_per_mat; + power_sa.readOp.leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP); + + // num_sa_subarray = subarray.num_cols / deg_bl_muxing; power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + (power_subarray_out_drv.readOp.leakage + + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); power_subarray_out_drv.readOp.power_gated_leakage = - (power_subarray_out_drv.readOp.power_gated_leakage + subarray_out_wire->power.readOp.power_gated_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + (power_subarray_out_drv.readOp.power_gated_leakage + + subarray_out_wire->power.readOp.power_gated_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); power.readOp.leakage += power_bitline.readOp.leakage + power_bl_precharge_eq_drv.readOp.leakage + power_sa.readOp.leakage + power_subarray_out_drv.readOp.leakage; - power.readOp.power_gated_leakage += power_bitline.readOp.power_gated_leakage + - power_bl_precharge_eq_drv.readOp.power_gated_leakage + - power_sa.readOp.power_gated_leakage + - power_subarray_out_drv.readOp.power_gated_leakage; + power.readOp.power_gated_leakage += + power_bitline.readOp.power_gated_leakage + + power_bl_precharge_eq_drv.readOp.power_gated_leakage + + power_sa.readOp.power_gated_leakage + + power_subarray_out_drv.readOp.power_gated_leakage; power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); power.readOp.leakage += power_comparator.readOp.leakage; power_comparator.readOp.power_gated_leakage *= num_do_b_mat * (RWP + ERP); - power.readOp.power_gated_leakage += power_comparator.readOp.power_gated_leakage; + power.readOp.power_gated_leakage += + power_comparator.readOp.power_gated_leakage; array_leakage = power_bitline.readOp.leakage; cl_leakage = - power_bl_precharge_eq_drv.readOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage + - power_comparator.readOp.leakage; - - - - //Decoder blocks - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; - - power_row_decoders.readOp.power_gated_leakage = row_dec->power.readOp.power_gated_leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.power_gated_leakage = bit_mux_dec->power.readOp.power_gated_leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.power_gated_leakage = sa_mux_lev_1_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.power_gated_leakage = sa_mux_lev_2_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_2; - -// if (!g_ip->wl_power_gated) -// { - power.readOp.leakage += r_predec->power.readOp.leakage + - b_mux_predec->power.readOp.leakage + - sa_mux_lev_1_predec->power.readOp.leakage + - sa_mux_lev_2_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage + - power_bit_mux_decoders.readOp.leakage + - power_sa_mux_lev_1_decoders.readOp.leakage + - power_sa_mux_lev_2_decoders.readOp.leakage; - - power.readOp.power_gated_leakage += r_predec->power.readOp.power_gated_leakage + - b_mux_predec->power.readOp.power_gated_leakage + - sa_mux_lev_1_predec->power.readOp.power_gated_leakage + - sa_mux_lev_2_predec->power.readOp.power_gated_leakage + - power_row_decoders.readOp.power_gated_leakage + - power_bit_mux_decoders.readOp.power_gated_leakage + - power_sa_mux_lev_1_decoders.readOp.power_gated_leakage + - power_sa_mux_lev_2_decoders.readOp.power_gated_leakage; - -// } -// else -// { -// power.readOp.power_gated_leakage += (r_predec->power.readOp.leakage + -// -// b_mux_predec->power.readOp.leakage + -// sa_mux_lev_1_predec->power.readOp.leakage + -// sa_mux_lev_2_predec->power.readOp.leakage + -// power_row_decoders.readOp.leakage + -// power_bit_mux_decoders.readOp.leakage + -// power_sa_mux_lev_1_decoders.readOp.leakage + -// power_sa_mux_lev_2_decoders.readOp.leakage)/g_tp.peri_global.Vdd*g_tp.peri_global.Vcc_min; - -// } + power_bl_precharge_eq_drv.readOp.leakage + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage + power_comparator.readOp.leakage; + + // Decoder blocks + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * + subarray.num_rows * + num_subarrays_per_mat; + power_bit_mux_decoders.readOp.leakage = + bit_mux_dec->power.readOp.leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.leakage = + sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.leakage = + sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; + + power_row_decoders.readOp.power_gated_leakage = + row_dec->power.readOp.power_gated_leakage * subarray.num_rows * + num_subarrays_per_mat; + power_bit_mux_decoders.readOp.power_gated_leakage = + bit_mux_dec->power.readOp.power_gated_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.power_gated_leakage = + sa_mux_lev_1_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.power_gated_leakage = + sa_mux_lev_2_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_2; + + // if (!g_ip->wl_power_gated) + // { + power.readOp.leakage += r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + + power.readOp.power_gated_leakage += + r_predec->power.readOp.power_gated_leakage + + b_mux_predec->power.readOp.power_gated_leakage + + sa_mux_lev_1_predec->power.readOp.power_gated_leakage + + sa_mux_lev_2_predec->power.readOp.power_gated_leakage + + power_row_decoders.readOp.power_gated_leakage + + power_bit_mux_decoders.readOp.power_gated_leakage + + power_sa_mux_lev_1_decoders.readOp.power_gated_leakage + + power_sa_mux_lev_2_decoders.readOp.power_gated_leakage; + + // } + // else + // { + // power.readOp.power_gated_leakage += + // (r_predec->power.readOp.leakage + + // + // b_mux_predec->power.readOp.leakage + + // sa_mux_lev_1_predec->power.readOp.leakage + + // sa_mux_lev_2_predec->power.readOp.leakage + + // power_row_decoders.readOp.leakage + + // power_bit_mux_decoders.readOp.leakage + + // power_sa_mux_lev_1_decoders.readOp.leakage + + // power_sa_mux_lev_2_decoders.readOp.leakage)/g_tp.peri_global.Vdd*g_tp.peri_global.Vcc_min; + + // } wl_leakage = r_predec->power.readOp.leakage + - b_mux_predec->power.readOp.leakage + - sa_mux_lev_1_predec->power.readOp.leakage + - sa_mux_lev_2_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage + - power_bit_mux_decoders.readOp.leakage + - power_sa_mux_lev_1_decoders.readOp.leakage + - power_sa_mux_lev_2_decoders.readOp.leakage; + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; //++++Below is gate leakage - power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_bitline.readOp.gate_leakage *= + subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.gate_leakage = + bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP); + + // num_sa_subarray = subarray.num_cols / deg_bl_muxing; power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + (power_subarray_out_drv.readOp.gate_leakage + + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - //cout<<"leakage"<power_gating) - { - - //cout<<"leakage1"<area.get_area()*subarray.num_cols * num_subarrays_per_mat*dp.num_mats; - array_wakeup_e.readOp.dynamic = sram_sleep_tx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_cols*dp.num_act_mats_hor_dir; - array_wakeup_t = sram_sleep_tx->wakeup_delay; - - wl_sleep_tx_area = (row_dec->exist ? row_dec->sleeptx->area.get_area() : 0)*subarray.num_rows * num_subarrays_per_mat*dp.num_mats - + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->area.get_area() : 0)*dp.num_mats - + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->area.get_area() : 0)*dp.num_mats - + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->area.get_area() : 0)*dp.num_mats; - wl_wakeup_e.readOp.dynamic = (row_dec->exist ? row_dec->sleeptx->wakeup_power.readOp.dynamic :0) * num_subarrays_per_mat*subarray.num_rows*dp.num_act_mats_hor_dir - + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats - + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats - + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats; - wl_wakeup_t = (row_dec->exist ? row_dec->sleeptx->wakeup_delay : 0) - + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->wakeup_delay : 0)*dp.num_mats - + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->wakeup_delay : 0)*dp.num_mats - + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->wakeup_delay : 0)*dp.num_mats;; + if (g_ip->power_gating) { + + // cout<<"leakage1"<area.get_area() * subarray.num_cols * + num_subarrays_per_mat * dp.num_mats; + array_wakeup_e.readOp.dynamic = + sram_sleep_tx->wakeup_power.readOp.dynamic * num_subarrays_per_mat * + subarray.num_cols * dp.num_act_mats_hor_dir; + array_wakeup_t = sram_sleep_tx->wakeup_delay; + + wl_sleep_tx_area = + (row_dec->exist ? row_dec->sleeptx->area.get_area() : 0) * + subarray.num_rows * num_subarrays_per_mat * dp.num_mats + + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->area.get_area() : 0) * + dp.num_mats + + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->area.get_area() + : 0) * + dp.num_mats + + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->area.get_area() + : 0) * + dp.num_mats; + wl_wakeup_e.readOp.dynamic = + (row_dec->exist ? row_dec->sleeptx->wakeup_power.readOp.dynamic : 0) * + num_subarrays_per_mat * subarray.num_rows * + dp.num_act_mats_hor_dir + + (bit_mux_dec->exist + ? bit_mux_dec->sleeptx->wakeup_power.readOp.dynamic + : 0) * + dp.num_mats + + (sa_mux_lev_1_dec->exist + ? sa_mux_lev_1_dec->sleeptx->wakeup_power.readOp.dynamic + : 0) * + dp.num_mats + + (sa_mux_lev_2_dec->exist + ? sa_mux_lev_2_dec->sleeptx->wakeup_power.readOp.dynamic + : 0) * + dp.num_mats; + wl_wakeup_t = + (row_dec->exist ? row_dec->sleeptx->wakeup_delay : 0) + + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->wakeup_delay : 0) * + dp.num_mats + + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->wakeup_delay + : 0) * + dp.num_mats + + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->wakeup_delay + : 0) * + dp.num_mats; + ; } // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; - - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - b_mux_predec->power.readOp.gate_leakage + - sa_mux_lev_1_predec->power.readOp.gate_leakage + - sa_mux_lev_2_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage + - power_bit_mux_decoders.readOp.gate_leakage + - power_sa_mux_lev_1_decoders.readOp.gate_leakage + - power_sa_mux_lev_2_decoders.readOp.gate_leakage; - } - else if (is_fa) //fully assoc + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat; + power_bit_mux_decoders.readOp.gate_leakage = + bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.gate_leakage = + sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.gate_leakage = + sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; + + power.readOp.gate_leakage += + r_predec->power.readOp.gate_leakage + + b_mux_predec->power.readOp.gate_leakage + + sa_mux_lev_1_predec->power.readOp.gate_leakage + + sa_mux_lev_2_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage + + power_bit_mux_decoders.readOp.gate_leakage + + power_sa_mux_lev_1_decoders.readOp.gate_leakage + + power_sa_mux_lev_2_decoders.readOp.gate_leakage; + } else if (is_fa) // fully assoc { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + int number_output_drivers_subarray = + num_sa_subarray; // / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + power_bitline.readOp.leakage *= + subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = + bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = + cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - //cout<<"leakage3"<power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); + + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + // cout<<"leakage4"<power.readOp.leakage * + subarray.num_rows * + num_subarrays_per_mat; + power.readOp.leakage += + r_predec->power.readOp.leakage + power_row_decoders.readOp.leakage; - //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; + // inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; - //cout<<"leakage5"<power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + // cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.gate_leakage = + cam_bl_precharge_eq_drv->power.readOp.gate_leakage * + num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); -// cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); - //cout<<"leakage3"<power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + // gate_leakage power + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat; + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; + // cout<<"leakage5"<power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - //cout<<"leakage5"<power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = + cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); + + power.readOp.leakage += // power_bitline.readOp.leakage + + // power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + // leakage power + power_row_decoders.readOp.leakage = + row_dec->power.readOp.leakage * subarray.num_rows * + num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.leakage += + r_predec->power.readOp.leakage + power_row_decoders.readOp.leakage; + + // inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + + power.readOp.leakage += power_cam_all_active.searchOp.leakage; + + //+++Below is gate leakage + power_bl_precharge_eq_drv.searchOp.gate_leakage = + cam_bl_precharge_eq_drv->power.readOp.gate_leakage * + num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= + num_sa_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); + + power.readOp + .gate_leakage += // power_bitline.readOp.gate_leakage + + // power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; - } - else //pure CAM - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.leakage += //power_bitline.readOp.leakage + - //power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - - // leakage power - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; - - //inside cam - power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; - - power.readOp.leakage += power_cam_all_active.searchOp.leakage; - - //+++Below is gate leakage - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + - //power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; - - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; - - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + // gate_leakage power + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + // inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; } } - diff --git a/cacti/mat.h b/cacti/mat.h old mode 100755 new mode 100644 index b8465e4..aba17d7 --- a/cacti/mat.h +++ b/cacti/mat.h @@ -29,144 +29,141 @@ * ***************************************************************************/ - - #ifndef __MAT_H__ #define __MAT_H__ #include "component.h" #include "decoder.h" -#include "wire.h" -#include "subarray.h" #include "powergating.h" +#include "subarray.h" +#include "wire.h" -class Mat : public Component -{ - public: - Mat(const DynamicParameter & dyn_p); - ~Mat(); - double compute_delays(double inrisetime); // return outrisetime - void compute_power_energy(); - - const DynamicParameter & dp; - - // TODO: clean up pointers and powerDefs below - Decoder * row_dec; - Decoder * bit_mux_dec; - Decoder * sa_mux_lev_1_dec; - Decoder * sa_mux_lev_2_dec; - PredecBlk * dummy_way_sel_predec_blk1; - PredecBlk * dummy_way_sel_predec_blk2; - PredecBlkDrv * way_sel_drv1; - PredecBlkDrv * dummy_way_sel_predec_blk_drv2; - - Predec * r_predec; - Predec * b_mux_predec; - Predec * sa_mux_lev_1_predec; - Predec * sa_mux_lev_2_predec; - - Wire * subarray_out_wire; - Driver * bl_precharge_eq_drv; - Driver * cam_bl_precharge_eq_drv;//bitline pre-charge circuit is separated for CAM and RAM arrays. - Driver * ml_precharge_drv;//matchline prechange driver - Driver * sl_precharge_eq_drv;//searchline prechage driver - Driver * sl_data_drv;//search line data driver - Driver * ml_to_ram_wl_drv;//search line data driver - - - powerDef power_row_decoders; - powerDef power_bit_mux_decoders; - powerDef power_sa_mux_lev_1_decoders; - powerDef power_sa_mux_lev_2_decoders; - powerDef power_fa_cam; // TODO: leakage power is not computed yet - powerDef power_bl_precharge_eq_drv; - powerDef power_subarray_out_drv; - powerDef power_cam_all_active; - powerDef power_searchline_precharge; - powerDef power_matchline_precharge; - powerDef power_ml_to_ram_wl_drv; - - double delay_fa_tag, delay_cam; - double delay_before_decoder; - double delay_bitline; - double delay_wl_reset; - double delay_bl_restore; - - double delay_searchline; - double delay_matchchline; - double delay_cam_sl_restore; - double delay_cam_ml_reset; - double delay_fa_ram_wl; - - double delay_hit_miss_reset; - double delay_hit_miss; - - Subarray subarray; - powerDef power_bitline, power_searchline, power_matchline, power_bitline_gated; - double per_bitline_read_energy; - int deg_bl_muxing; - int num_act_mats_hor_dir; - double delay_writeback; - Area cell,cam_cell; - bool is_dram,is_fa, pure_cam, camFlag; - int num_mats; - powerDef power_sa; - double delay_sa; - double leak_power_sense_amps_closed_page_state; - double leak_power_sense_amps_open_page_state; - double delay_subarray_out_drv; - double delay_subarray_out_drv_htree; - double delay_comparator; - powerDef power_comparator; - int num_do_b_mat; - int num_so_b_mat; - int num_sa_subarray; - int num_sa_subarray_search; - double C_bl; - - uint32_t num_subarrays_per_mat; // the number of subarrays in a mat - uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat - - double array_leakage; - double wl_leakage; - double cl_leakage; - - Sleep_tx * sram_sleep_tx; - Sleep_tx * wl_sleep_tx; - Sleep_tx * cl_sleep_tx; - - powerDef array_wakeup_e; - double array_wakeup_t; - double array_sleep_tx_area; - - powerDef blfloating_wakeup_e; - double blfloating_wakeup_t; - double blfloating_sleep_tx_area; - - powerDef wl_wakeup_e; - double wl_wakeup_t; - double wl_sleep_tx_area; - - powerDef cl_wakeup_e; - double cl_wakeup_t; - double cl_sleep_tx_area; - - private: - double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); - double width_write_driver_or_write_mux(); - double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w); - double compute_cam_delay(double inrisetime); - double compute_bitline_delay(double inrisetime); - double compute_sa_delay(double inrisetime); - double compute_subarray_out_drv(double inrisetime); - double compute_comparator_delay(double inrisetime); - - int RWP; - int ERP; - int EWP; - int SCHP; +class Mat : public Component { +public: + Mat(const DynamicParameter &dyn_p); + ~Mat(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_energy(); + + const DynamicParameter &dp; + + // TODO: clean up pointers and powerDefs below + Decoder *row_dec; + Decoder *bit_mux_dec; + Decoder *sa_mux_lev_1_dec; + Decoder *sa_mux_lev_2_dec; + PredecBlk *dummy_way_sel_predec_blk1; + PredecBlk *dummy_way_sel_predec_blk2; + PredecBlkDrv *way_sel_drv1; + PredecBlkDrv *dummy_way_sel_predec_blk_drv2; + + Predec *r_predec; + Predec *b_mux_predec; + Predec *sa_mux_lev_1_predec; + Predec *sa_mux_lev_2_predec; + + Wire *subarray_out_wire; + Driver *bl_precharge_eq_drv; + Driver *cam_bl_precharge_eq_drv; // bitline pre-charge circuit is separated + // for CAM and RAM arrays. + Driver *ml_precharge_drv; // matchline prechange driver + Driver *sl_precharge_eq_drv; // searchline prechage driver + Driver *sl_data_drv; // search line data driver + Driver *ml_to_ram_wl_drv; // search line data driver + + powerDef power_row_decoders; + powerDef power_bit_mux_decoders; + powerDef power_sa_mux_lev_1_decoders; + powerDef power_sa_mux_lev_2_decoders; + powerDef power_fa_cam; // TODO: leakage power is not computed yet + powerDef power_bl_precharge_eq_drv; + powerDef power_subarray_out_drv; + powerDef power_cam_all_active; + powerDef power_searchline_precharge; + powerDef power_matchline_precharge; + powerDef power_ml_to_ram_wl_drv; + + double delay_fa_tag, delay_cam; + double delay_before_decoder; + double delay_bitline; + double delay_wl_reset; + double delay_bl_restore; + + double delay_searchline; + double delay_matchchline; + double delay_cam_sl_restore; + double delay_cam_ml_reset; + double delay_fa_ram_wl; + + double delay_hit_miss_reset; + double delay_hit_miss; + + Subarray subarray; + powerDef power_bitline, power_searchline, power_matchline, + power_bitline_gated; + double per_bitline_read_energy; + int deg_bl_muxing; + int num_act_mats_hor_dir; + double delay_writeback; + Area cell, cam_cell; + bool is_dram, is_fa, pure_cam, camFlag; + int num_mats; + powerDef power_sa; + double delay_sa; + double leak_power_sense_amps_closed_page_state; + double leak_power_sense_amps_open_page_state; + double delay_subarray_out_drv; + double delay_subarray_out_drv_htree; + double delay_comparator; + powerDef power_comparator; + int num_do_b_mat; + int num_so_b_mat; + int num_sa_subarray; + int num_sa_subarray_search; + double C_bl; + + uint32_t num_subarrays_per_mat; // the number of subarrays in a mat + uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat + + double array_leakage; + double wl_leakage; + double cl_leakage; + + Sleep_tx *sram_sleep_tx; + Sleep_tx *wl_sleep_tx; + Sleep_tx *cl_sleep_tx; + + powerDef array_wakeup_e; + double array_wakeup_t; + double array_sleep_tx_area; + + powerDef blfloating_wakeup_e; + double blfloating_wakeup_t; + double blfloating_sleep_tx_area; + + powerDef wl_wakeup_e; + double wl_wakeup_t; + double wl_sleep_tx_area; + + powerDef cl_wakeup_e; + double cl_wakeup_t; + double cl_sleep_tx_area; + +private: + double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + double width_write_driver_or_write_mux(); + double compute_comparators_height(int tagbits, int number_ways_in_mat, + double subarray_mem_cell_area_w); + double compute_cam_delay(double inrisetime); + double compute_bitline_delay(double inrisetime); + double compute_sa_delay(double inrisetime); + double compute_subarray_out_drv(double inrisetime); + double compute_comparator_delay(double inrisetime); + + int RWP; + int ERP; + int EWP; + int SCHP; }; - - #endif diff --git a/cacti/nuca.cc b/cacti/nuca.cc index 230025c..d9a0bee 100644 --- a/cacti/nuca.cc +++ b/cacti/nuca.cc @@ -29,29 +29,27 @@ * ***************************************************************************/ - - #include "nuca.h" + #include "Ucache.h" + #include -unsigned int MIN_BANKSIZE=65536; -#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ +unsigned int MIN_BANKSIZE = 65536; +#define FIXED_OVERHEAD \ + 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ #define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ #define CONTR_2_BANK_LAT 0 -int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */]; +int cont_stats[2 /*l2 or l3*/][5 /* cores */][ROUTER_TYPES][7 /*banks*/] + [8 /* cycle time */]; - Nuca::Nuca( - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ):deviceType(dt) -{ +Nuca::Nuca(TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)) + : deviceType(dt) { init_cont(); } -void -Nuca::init_cont() -{ +void Nuca::init_cont() { FILE *cont; char line[5000]; char jk[5000]; @@ -61,14 +59,16 @@ Nuca::init_cont() exit(0); } - for(int i=0; i<2; i++) { - for(int j=2; j<5; j++) { - for(int k=0; knuca_cache_sz = g_ip->cache_sz; nuca_list.push_back(new nuca_org_t()); - if (g_ip->cache_level == 0) l2_c = 1; - else l2_c = 0; - - if (g_ip->cores <= 4) core_in = 2; - else if (g_ip->cores <= 8) core_in = 3; - else if (g_ip->cores <= 16) core_in = 4; - else {cout << "Number of cores should be <= 16!\n"; exit(0);} - + if (g_ip->cache_level == 0) + l2_c = 1; + else + l2_c = 0; + + if (g_ip->cores <= 4) + core_in = 2; + else if (g_ip->cores <= 8) + core_in = 3; + else if (g_ip->cores <= 16) + core_in = 4; + else { + cout << "Number of cores should be <= 16!\n"; + exit(0); + } - // set the lower bound to an appropriate value. this depends on cache associativity + // set the lower bound to an appropriate value. this depends on cache + // associativity if (g_ip->assoc > 2) { i = 2; while (i != g_ip->assoc) { @@ -207,20 +204,17 @@ Nuca::sim_nuca() } } - iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE); + iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE); - if (g_ip->force_wiretype) - { + if (g_ip->force_wiretype) { if (g_ip->wt == Low_swing) { wt_min = Low_swing; wt_max = Low_swing; - } - else { + } else { wt_min = Global; - wt_max = Low_swing-1; + wt_max = Low_swing - 1; } - } - else { + } else { wt_min = Global; wt_max = Low_swing; } @@ -228,53 +222,57 @@ Nuca::sim_nuca() if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { - fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n"); + fprintf( + stderr, + "Incorrect bank count value! Please fix the value in cache.cfg\n"); } bank_start = (int)logtwo((double)g_ip->nuca_bank_count); - iterations = bank_start+1; - g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count; + iterations = bank_start + 1; + g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count; } cout << "Simulating various NUCA configurations\n"; - for (it=bank_start; itnuca_cache_sz/g_ip->cache_sz; - cout << "====" << g_ip->cache_sz << "\n"; + // output_UCA(&ures); + bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz; + cout << "====" << g_ip->cache_sz << "\n"; - for (wr=wt_min; wr<=wt_max; wr++) { + for (wr = wt_min; wr <= wt_max; wr++) { - for (ro=0; roflit_size; //initialize router + for (ro = 0; ro < ROUTER_TYPES; ro++) { + flit_width = (int)router_s[ro]->flit_size; // initialize router nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; /* calculate router and wire parameters */ - double vlength = ures.cache_ht; /* length of the wire (u)*/ + double vlength = ures.cache_ht; /* length of the wire (u)*/ double hlength = ures.cache_len; // u /* find delay, area, and power for wires */ - wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); - wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); + wire_vertical[wr] = new Wire((enum Wire_type)wr, vlength); + wire_horizontal[wr] = new Wire((enum Wire_type)wr, hlength); - - hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); - ver_hop_lat = calc_cycles(wire_vertical[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + hor_hop_lat = + calc_cycles(wire_horizontal[wr]->delay, + 1 / (nuca_list.back()->nuca_pda.cycle_time * .001)); + ver_hop_lat = + calc_cycles(wire_vertical[wr]->delay, + 1 / (nuca_list.back()->nuca_pda.cycle_time * .001)); /* * assume a grid like topology and explore for optimal network * configuration using different row and column count values. */ - for (c=1; c<=(unsigned int)bank_count; c++) { - while (bank_count%c != 0) c++; - r = bank_count/c; + for (c = 1; c <= (unsigned int)bank_count; c++) { + while (bank_count % c != 0) + c++; + r = bank_count / c; /* * to find the avg access latency of a NUCA cache, uncontended @@ -286,8 +284,8 @@ Nuca::sim_nuca() */ totno_hops = totno_hhops = totno_vhops = tot_lat = 0; k = 1; - for (i=0; idelay*avg_hop) + - calc_cycles(ures.access_time, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + curr_acclat = + 2 * avg_lat + 2 * (router_s[ro]->delay * avg_hop) + + calc_cycles(ures.access_time, + 1 / (nuca_list.back()->nuca_pda.cycle_time * .001)); /* avg access lat of nuca */ - avg_dyn_power = - avg_hop * - (router_s[ro]->power.readOp.dynamic) + avg_hhop * - (wire_horizontal[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + avg_vhop * - (wire_vertical[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic; + avg_dyn_power = avg_hop * (router_s[ro]->power.readOp.dynamic) + + avg_hhop * + (wire_horizontal[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + + avg_vhop * (wire_vertical[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + + ures.power.readOp.dynamic; avg_leakage_power = - bank_count * router_s[ro]->power.readOp.leakage + - avg_hhop * (wire_horizontal[wr]->power.readOp.leakage* - wire_horizontal[wr]->delay) * flit_width + - avg_vhop * (wire_vertical[wr]->power.readOp.leakage * - wire_horizontal[wr]->delay); + bank_count * router_s[ro]->power.readOp.leakage + + avg_hhop * + (wire_horizontal[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay) * + flit_width + + avg_vhop * (wire_vertical[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay); if (curr_acclat < opt_acclat) { opt_acclat = curr_acclat; @@ -346,9 +347,9 @@ Nuca::sim_nuca() totno_vhops = 0; } nuca_list.back()->wire_pda.power.readOp.dynamic = - opt_avg_hop * flit_width * - (wire_horizontal[wr]->power.readOp.dynamic + - wire_vertical[wr]->power.readOp.dynamic); + opt_avg_hop * flit_width * + (wire_horizontal[wr]->power.readOp.dynamic + + wire_vertical[wr]->power.readOp.dynamic); nuca_list.back()->avg_hops = opt_avg_hop; /* network delay/power */ nuca_list.back()->h_wire = wire_horizontal[wr]; @@ -362,22 +363,24 @@ Nuca::sim_nuca() nuca_list.back()->bank_pda.area.w = ures.cache_len; nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; - num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/)); - if(num_cyc%2 != 0) num_cyc++; - if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles + num_cyc = calc_cycles( + nuca_list.back()->bank_pda.delay /*s*/, + 1 / (nuca_list.back()->nuca_pda.cycle_time * .001 /*GHz*/)); + if (num_cyc % 2 != 0) + num_cyc++; + if (num_cyc > 16) + num_cyc = 16; // we have data only up to 16 cycles if (it < 7) { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + nuca_list.back()->nuca_pda.delay = + opt_acclat + cont_stats[l2_c][core_in][ro][it][num_cyc / 2 - 1]; nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; - } - else { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + cont_stats[l2_c][core_in][ro][it][num_cyc / 2 - 1]; + } else { + nuca_list.back()->nuca_pda.delay = + opt_acclat + cont_stats[l2_c][core_in][ro][7][num_cyc / 2 - 1]; nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + cont_stats[l2_c][core_in][ro][7][num_cyc / 2 - 1]; } nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; @@ -386,32 +389,29 @@ Nuca::sim_nuca() nuca_list.back()->bank_count = bank_count; nuca_list.back()->rows = opt_rows; nuca_list.back()->columns = opt_columns; - calculate_nuca_area (nuca_list.back()); + calculate_nuca_area(nuca_list.back()); minval.update_min_values(nuca_list.back()); nuca_list.push_back(new nuca_org_t()); opt_acclat = BIGNUM; - } } g_ip->cache_sz /= 2; } - delete(nuca_list.back()); + delete (nuca_list.back()); nuca_list.pop_back(); opt_n = find_optimal_nuca(&nuca_list, &minval); print_nuca(opt_n); - g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count; + g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count; list::iterator niter; - for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) - { + for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) { delete *niter; } nuca_list.clear(); - for(int i=0; i < ROUTER_TYPES; i++) - { + for (int i = 0; i < ROUTER_TYPES; i++) { delete router_s[i]; } g_ip->display_ip(); @@ -422,74 +422,58 @@ Nuca::sim_nuca() // g_ip->ndcm = 1; // g_ip->ndsam1 = 8; // g_ip->ndsam2 = 32; - } - - void -Nuca::print_nuca (nuca_org_t *fr) -{ +void Nuca::print_nuca(nuca_org_t *fr) { printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " - "----------\n\n"); + "----------\n\n"); printf("Optimal number of banks - %d\n", fr->bank_count); - printf("Grid organization rows x columns - %d x %d\n", - fr->rows, fr->columns); - printf("Network frequency - %g GHz\n", - (1/fr->nuca_pda.cycle_time)*1e3); - printf("Cache dimension (mm x mm) - %g x %g\n", - fr->nuca_pda.area.h*1e-3, - fr->nuca_pda.area.w*1e-3); + printf("Grid organization rows x columns - %d x %d\n", fr->rows, fr->columns); + printf("Network frequency - %g GHz\n", (1 / fr->nuca_pda.cycle_time) * 1e3); + printf("Cache dimension (mm x mm) - %g x %g\n", fr->nuca_pda.area.h * 1e-3, + fr->nuca_pda.area.w * 1e-3); fr->router->print_router(); printf("\n\nWire stats:\n"); if (fr->h_wire->wt == Global) { printf("\tWire type - Full swing global wires with least " - "possible delay\n"); - } - else if (fr->h_wire->wt == Global_5) { + "possible delay\n"); + } else if (fr->h_wire->wt == Global_5) { printf("\tWire type - Full swing global wires with " - "5%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_10) { + "5%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_10) { printf("\tWire type - Full swing global wires with " - "10%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_20) { + "10%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_20) { printf("\tWire type - Full swing global wires with " - "20%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_30) { + "20%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_30) { printf("\tWire type - Full swing global wires with " - "30%% delay penalty\n"); - } - else if(fr->h_wire->wt == Low_swing) { + "30%% delay penalty\n"); + } else if (fr->h_wire->wt == Low_swing) { printf("\tWire type - Low swing wires\n"); } - printf("\tHorizontal link delay - %g (ns)\n", - fr->h_wire->delay*1e9); - printf("\tVertical link delay - %g (ns)\n", - fr->v_wire->delay*1e9); + printf("\tHorizontal link delay - %g (ns)\n", fr->h_wire->delay * 1e9); + printf("\tVertical link delay - %g (ns)\n", fr->v_wire->delay * 1e9); printf("\tDelay/length - %g (ns/mm)\n", - fr->h_wire->delay*1e9/fr->bank_pda.area.w); + fr->h_wire->delay * 1e9 / fr->bank_pda.area.w); printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->h_wire->power.readOp.dynamic*1e9, - fr->h_wire->power.readOp.leakage*1e9); + "\t -leakage %g (nW)\n\n", + fr->h_wire->power.readOp.dynamic * 1e9, + fr->h_wire->power.readOp.leakage * 1e9); printf("\tVertical link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->v_wire->power.readOp.dynamic*1e9, - fr->v_wire->power.readOp.leakage*1e9); + "\t -leakage %g (nW)\n\n", + fr->v_wire->power.readOp.dynamic * 1e9, + fr->v_wire->power.readOp.leakage * 1e9); printf("\n\n"); fr->v_wire->print_wire(); printf("\n\nBank stats:\n"); } - - nuca_org_t * -Nuca::find_optimal_nuca (list *n, min_values_t *minval) -{ +nuca_org_t *Nuca::find_optimal_nuca(list *n, + min_values_t *minval) { double cost = 0; double min_cost = BIGNUM; nuca_org_t *res = NULL; @@ -503,109 +487,102 @@ Nuca::find_optimal_nuca (list *n, min_values_t *minval) list::iterator niter; - for (niter = n->begin(); niter != n->end(); niter++) { fprintf(stderr, "\n-----------------------------" - "---------------\n"); - + "---------------\n"); printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " - "bank_dpower = %g \tleak = %g \tcycle = %g\n", - (*niter)->bank_count, - (*niter)->nuca_pda.delay, - (*niter)->nuca_pda.power.readOp.dynamic, - (*niter)->h_wire->wt, - (*niter)->bank_pda.power.readOp.dynamic, - (*niter)->nuca_pda.power.readOp.leakage, - (*niter)->nuca_pda.cycle_time); - + "bank_dpower = %g \tleak = %g \tcycle = %g\n", + (*niter)->bank_count, (*niter)->nuca_pda.delay, + (*niter)->nuca_pda.power.readOp.dynamic, (*niter)->h_wire->wt, + (*niter)->bank_pda.power.readOp.dynamic, + (*niter)->nuca_pda.power.readOp.leakage, + (*niter)->nuca_pda.cycle_time); if (g_ip->ed == 1) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); if (min_cost > cost) { min_cost = cost; res = ((*niter)); } - } - else if (g_ip->ed == 2) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); + } else if (g_ip->ed == 2) { + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); if (min_cost > cost) { min_cost = cost; res = ((*niter)); } - } - else { + } else { /* * check whether the current organization * meets the input deviation constraints */ v = check_nuca_org((*niter), minval); - if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + if (minval->min_leakage == 0) + minval->min_leakage = 0.1; // FIXME remove this after leakage modeling if (v) { - cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) + - c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) + - dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) + - lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) + - a * ((*niter)->nuca_pda.area.get_area()/minval->min_area)); + cost = + (d * ((*niter)->nuca_pda.delay / minval->min_delay) + + c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) + + dp * ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn) + + lp * ((*niter)->nuca_pda.power.readOp.leakage / + minval->min_leakage) + + a * ((*niter)->nuca_pda.area.get_area() / minval->min_area)); fprintf(stderr, "cost = %g\n", cost); if (min_cost > cost) { min_cost = cost; res = ((*niter)); } - } - else { + } else { niter = n->erase(niter); - if (niter !=n->begin()) - niter --; + if (niter != n->begin()) + niter--; } } } return res; } - int -Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) -{ - if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) { +int Nuca::check_nuca_org(nuca_org_t *n, min_values_t *minval) { + if (((n->nuca_pda.delay - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev_nuca) { return 0; } - if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn) * + 100 > g_ip->dynamic_power_dev_nuca) { return 0; } - if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage) * + 100 > g_ip->leakage_power_dev_nuca) { return 0; } - if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 > g_ip->cycle_time_dev_nuca) { return 0; } - if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 > + if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) * + 100 > g_ip->area_dev_nuca) { return 0; } return 1; } - void -Nuca::calculate_nuca_area (nuca_org_t *nuca) -{ - nuca->nuca_pda.area.h= - nuca->rows * ((nuca->h_wire->wire_width + - nuca->h_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.h); +void Nuca::calculate_nuca_area(nuca_org_t *nuca) { + nuca->nuca_pda.area.h = + nuca->rows * ((nuca->h_wire->wire_width + nuca->h_wire->wire_spacing) * + nuca->router->flit_size + + nuca->bank_pda.area.h); nuca->nuca_pda.area.w = - nuca->columns * ((nuca->v_wire->wire_width + - nuca->v_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.w); + nuca->columns * ((nuca->v_wire->wire_width + nuca->v_wire->wire_spacing) * + nuca->router->flit_size + + nuca->bank_pda.area.w); } - diff --git a/cacti/nuca.h b/cacti/nuca.h index a00044e..1e6f8bc 100644 --- a/cacti/nuca.h +++ b/cacti/nuca.h @@ -29,73 +29,63 @@ * ***************************************************************************/ - #ifndef __NUCA_H__ #define __NUCA_H__ -#include "basic_circuit.h" -#include "component.h" -#include "parameter.h" #include "assert.h" +#include "basic_circuit.h" #include "cacti_interface.h" -#include "wire.h" -#include "mat.h" +#include "component.h" #include "io.h" +#include "mat.h" +#include "parameter.h" #include "router.h" -#include - +#include "wire.h" +#include class nuca_org_t { - public: +public: ~nuca_org_t(); -// int size; - /* area, power, access time, and cycle time stats */ - Component nuca_pda; - Component bank_pda; - Component wire_pda; - Wire *h_wire; - Wire *v_wire; - Router *router; - /* for particular network configuration - * calculated based on a cycle accurate - * simulation Ref: CACTI 6 - Tech report - */ - double contention; - - /* grid network stats */ - double avg_hops; - int rows; - int columns; - int bank_count; + // int size; + /* area, power, access time, and cycle time stats */ + Component nuca_pda; + Component bank_pda; + Component wire_pda; + Wire *h_wire; + Wire *v_wire; + Router *router; + /* for particular network configuration + * calculated based on a cycle accurate + * simulation Ref: CACTI 6 - Tech report + */ + double contention; + + /* grid network stats */ + double avg_hops; + int rows; + int columns; + int bank_count; }; - - -class Nuca : public Component -{ - public: - Nuca( - TechnologyParameter::DeviceType *dt); - void print_router(); - ~Nuca(); - void sim_nuca(); - void init_cont(); - int calc_cycles(double lat, double oper_freq); - void calculate_nuca_area (nuca_org_t *nuca); - int check_nuca_org (nuca_org_t *n, min_values_t *minval); - nuca_org_t * find_optimal_nuca (list *n, min_values_t *minval); - void print_nuca(nuca_org_t *n); - void print_cont_stats(); - - private: - - TechnologyParameter::DeviceType *deviceType; - int wt_min, wt_max; - Wire *wire_vertical[WIRE_TYPES], - *wire_horizontal[WIRE_TYPES]; - +class Nuca : public Component { +public: + Nuca(TechnologyParameter::DeviceType *dt); + void print_router(); + ~Nuca(); + void sim_nuca(); + void init_cont(); + int calc_cycles(double lat, double oper_freq); + void calculate_nuca_area(nuca_org_t *nuca); + int check_nuca_org(nuca_org_t *n, min_values_t *minval); + nuca_org_t *find_optimal_nuca(list *n, min_values_t *minval); + void print_nuca(nuca_org_t *n); + void print_cont_stats(); + +private: + TechnologyParameter::DeviceType *deviceType; + int wt_min, wt_max; + Wire *wire_vertical[WIRE_TYPES], *wire_horizontal[WIRE_TYPES]; }; - #endif diff --git a/cacti/parameter.cc b/cacti/parameter.cc index c4c4a92..d22e231 100644 --- a/cacti/parameter.cc +++ b/cacti/parameter.cc @@ -29,123 +29,163 @@ * ***************************************************************************/ +#include "parameter.h" +#include "area.h" +#include #include #include -#include - -#include "parameter.h" -#include "area.h" using namespace std; - -InputParameter * g_ip; +InputParameter *g_ip; TechnologyParameter g_tp; - - -void TechnologyParameter::DeviceType::display(uint32_t indent) -{ +void TechnologyParameter::DeviceType::display(uint32_t indent) { string indent_str(indent, ' '); - cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; - cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; - cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; - cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; - cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; - cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; - cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; - cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; - cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; - cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; - cout << indent_str << "Vdd_default = " << setw(12) << Vdd_default << " V" << endl; - cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; - cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; - cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; - cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; - cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; - cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; - cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; + cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" + << endl; + cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" + << endl; + cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" + << endl; + cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" + << endl; + cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; + cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; + cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" + << endl; + cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" + << endl; + cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; + cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; + cout << indent_str << "Vdd_default = " << setw(12) << Vdd_default << " V" + << endl; + cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; + cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; + cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" + << endl; + cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" + << endl; + cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; + cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; + cout << indent_str + << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; } - - -void TechnologyParameter::InterconnectType::display(uint32_t indent) -{ +void TechnologyParameter::InterconnectType::display(uint32_t indent) { string indent_str(indent, ' '); - cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; - cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; - cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; + cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; + cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" + << endl; + cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" + << endl; } -void TechnologyParameter::ScalingFactor::display(uint32_t indent) -{ +void TechnologyParameter::ScalingFactor::display(uint32_t indent) { string indent_str(indent, ' '); - cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; - cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; + cout << indent_str << "logic_scaling_co_eff = " << setw(12) + << logic_scaling_co_eff << endl; + cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density + << " # of tx/um^2" << endl; } -void TechnologyParameter::MemoryType::display(uint32_t indent) -{ +void TechnologyParameter::MemoryType::display(uint32_t indent) { string indent_str(indent, ' '); cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; - cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; - cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; - cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; + cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" + << endl; + cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" + << endl; + cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" + << endl; cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; } - - -void TechnologyParameter::display(uint32_t indent) -{ +void TechnologyParameter::display(uint32_t indent) { string indent_str(indent, ' '); - cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; - cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; - cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; - cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; - cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; - cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; - cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; - cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; - cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; - cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; - cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; - cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; - cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; - cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; + cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) + << ram_wl_stitching_overhead_ << " um" << endl; + cout << indent_str << "min_w_nmos_ = " << setw(12) + << min_w_nmos_ << " um" << endl; + cout << indent_str << "max_w_nmos_ = " << setw(12) + << max_w_nmos_ << " um" << endl; + cout << indent_str << "unit_len_wire_del = " << setw(12) + << unit_len_wire_del << " s/um^2" << endl; + cout << indent_str << "FO4 = " << setw(12) << FO4 + << " s" << endl; + cout << indent_str << "kinv = " << setw(12) << kinv + << " s" << endl; + cout << indent_str << "vpp = " << setw(12) << vpp + << " V" << endl; + cout << indent_str << "w_sense_en = " << setw(12) + << w_sense_en << " um" << endl; + cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n + << " um" << endl; + cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p + << " um" << endl; + cout << indent_str << "w_iso = " << setw(12) << w_iso + << " um" << endl; + cout << indent_str << "w_poly_contact = " << setw(12) + << w_poly_contact << " um" << endl; + cout << indent_str << "spacing_poly_to_poly = " << setw(12) + << spacing_poly_to_poly << " um" << endl; + cout << indent_str << "spacing_poly_to_contact = " << setw(12) + << spacing_poly_to_contact << " um" << endl; cout << endl; - cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; - cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; - cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; - cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; - cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; - cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; - cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; - cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; - cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; - cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; + cout << indent_str << "w_comp_inv_p1 = " << setw(12) + << w_comp_inv_p1 << " um" << endl; + cout << indent_str << "w_comp_inv_p2 = " << setw(12) + << w_comp_inv_p2 << " um" << endl; + cout << indent_str << "w_comp_inv_p3 = " << setw(12) + << w_comp_inv_p3 << " um" << endl; + cout << indent_str << "w_comp_inv_n1 = " << setw(12) + << w_comp_inv_n1 << " um" << endl; + cout << indent_str << "w_comp_inv_n2 = " << setw(12) + << w_comp_inv_n2 << " um" << endl; + cout << indent_str << "w_comp_inv_n3 = " << setw(12) + << w_comp_inv_n3 << " um" << endl; + cout << indent_str << "w_eval_inv_p = " << setw(12) + << w_eval_inv_p << " um" << endl; + cout << indent_str << "w_eval_inv_n = " << setw(12) + << w_eval_inv_n << " um" << endl; + cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n + << " um" << endl; + cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p + << " um" << endl; cout << endl; - cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; - cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; - cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; - cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; - cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; + cout << indent_str << "dram_cell_I_on = " << setw(12) + << dram_cell_I_on << " A/um" << endl; + cout << indent_str << "dram_cell_Vdd = " << setw(12) + << dram_cell_Vdd << " V" << endl; + cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) + << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; + cout << indent_str << "dram_cell_C = " << setw(12) + << dram_cell_C << " F" << endl; + cout << indent_str << "gm_sense_amp_latch = " << setw(12) + << gm_sense_amp_latch << " F/s" << endl; cout << endl; - cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; - cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; - cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; - cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; - cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; - cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; - cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; + cout << indent_str << "w_nmos_b_mux = " << setw(12) + << w_nmos_b_mux << " um" << endl; + cout << indent_str << "w_nmos_sa_mux = " << setw(12) + << w_nmos_sa_mux << " um" << endl; + cout << indent_str << "w_pmos_bl_precharge = " << setw(12) + << w_pmos_bl_precharge << " um" << endl; + cout << indent_str << "w_pmos_bl_eq = " << setw(12) + << w_pmos_bl_eq << " um" << endl; + cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) + << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; + cout << indent_str << "HPOWERRAIL = " << setw(12) + << HPOWERRAIL << " um" << endl; + cout << indent_str << "cell_h_def = " << setw(12) + << cell_h_def << " um" << endl; cout << endl; cout << indent_str << "SRAM cell transistor: " << endl; @@ -184,459 +224,455 @@ void TechnologyParameter::display(uint32_t indent) dram.display(indent + 2); } - -DynamicParameter::DynamicParameter(): - use_inp_params(0), cell(), is_valid(true) -{ -} - - - -DynamicParameter::DynamicParameter( - bool is_tag_, - int pure_ram_, - int pure_cam_, - double Nspd_, - unsigned int Ndwl_, - unsigned int Ndbl_, - unsigned int Ndcm_, - unsigned int Ndsam_lev_1_, - unsigned int Ndsam_lev_2_, - bool is_main_mem_): - is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), - Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), - number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), - is_main_mem(is_main_mem_), cell(), is_valid(false) -{ - ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - - unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer - const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; +DynamicParameter::DynamicParameter() + : use_inp_params(0), cell(), is_valid(true) {} + +DynamicParameter::DynamicParameter(bool is_tag_, int pure_ram_, int pure_cam_, + double Nspd_, unsigned int Ndwl_, + unsigned int Ndbl_, unsigned int Ndcm_, + unsigned int Ndsam_lev_1_, + unsigned int Ndsam_lev_2_, bool is_main_mem_) + : is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), + Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_), + Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), + number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), + is_main_mem(is_main_mem_), cell(), is_valid(false) { + ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type + : g_ip->data_arr_ram_cell_tech_type; + is_dram = + ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + + unsigned int capacity_per_die = + g_ip->cache_sz / + NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer + const TechnologyParameter::InterconnectType &wire_local = g_tp.wire_local; fully_assoc = (g_ip->fully_assoc) ? true : false; - if (fully_assoc || pure_cam) - { // fully-assocative cache -- ref: CACTi 2.0 report - if (Ndwl != 1 || //Ndwl is fixed to 1 for FA - Ndcm != 1 || //Ndcm is fixed to 1 for FA - Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA - Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one - Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one - Ndbl < 2) - { - return; - } + if (fully_assoc || + pure_cam) { // fully-assocative cache -- ref: CACTi 2.0 report + if (Ndwl != 1 || // Ndwl is fixed to 1 for FA + Ndcm != 1 || // Ndcm is fixed to 1 for FA + Nspd < 1 || Nspd > 1 || // Nspd is fixed to 1 for FA + Ndsam_lev_1 != 1 || // Ndsam_lev_1 is fixed to one + Ndsam_lev_2 != 1 || // Ndsam_lev_2 is fixed to one + Ndbl < 2) { + return; + } } - if ((is_dram) && (!is_tag) && (Ndcm > 1)) - { - return; // For a DRAM array, each bitline has its own sense-amp + if ((is_dram) && (!is_tag) && (Ndcm > 1)) { + return; // For a DRAM array, each bitline has its own sense-amp } - // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be - // at least two because an array is assumed to have at least one mat. And a mat - // is formed out of two horizontal subarrays and two vertical subarrays - if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) - { - return; + // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl + // should be at least two because an array is assumed to have at least one + // mat. And a mat is formed out of two horizontal subarrays and two vertical + // subarrays + if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) { + return; } //***********compute row, col of an subarray - if (!(fully_assoc || pure_cam))//Not fully_asso nor cam + if (!(fully_assoc || pure_cam)) // Not fully_asso nor cam { - // if data array, let tagbits = 0 - if (is_tag) - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + - _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks); - - } - tagbits = (((tagbits + 3) >> 2) << 2); - - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); - //burst_length = 1; - } - else - { - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); - // burst_length = g_ip->block_sz * 8 / g_ip->out_w; - } - - if (num_r_subarray < MINSUBARRAYROWS) return; - if (num_r_subarray == 0) return; - if (num_r_subarray > MAXSUBARRAYROWS) return; - if (num_c_subarray < MINSUBARRAYCOLS) return; - if (num_c_subarray > MAXSUBARRAYCOLS) return; + // if data array, let tagbits = 0 + if (is_tag) { + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + + _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks); + } + tagbits = (((tagbits + 3) >> 2) << 2); + + num_r_subarray = (int)ceil( + capacity_per_die / (g_ip->nbanks * g_ip->block_sz * g_ip->tag_assoc * + Ndbl * Nspd)); // + EPSILON); + num_c_subarray = + (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl)); // + EPSILON); + // burst_length = 1; + } else { + num_r_subarray = (int)ceil( + capacity_per_die / (g_ip->nbanks * g_ip->block_sz * g_ip->data_assoc * + Ndbl * Nspd)); // + EPSILON); + num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / + Ndwl)); // + EPSILON); + EPSILON); + // burst_length = g_ip->block_sz * 8 / g_ip->out_w; + } + + if (num_r_subarray < MINSUBARRAYROWS) + return; + if (num_r_subarray == 0) + return; + if (num_r_subarray > MAXSUBARRAYROWS) + return; + if (num_c_subarray < MINSUBARRAYCOLS) + return; + if (num_c_subarray > MAXSUBARRAYCOLS) + return; } - else - {//either fully-asso or cam - if (pure_cam) - { - if (g_ip->specific_tag) - { - tagbits = int(ceil(g_ip->tag_w/8.0)*8); - } - else - { - tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); -// cout<<"Pure CAM needs tag width to be specified"<> 2) << 2); - - tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. - //tag_num_c_subarray = (int)(tagbits + EPSILON); - tag_num_c_subarray = tagbits; - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } - else //fully associative - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. - } - tagbits = (((tagbits + 3) >> 2) << 2); - - tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); - tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - - data_num_r_subarray = tag_num_r_subarray; - data_num_c_subarray = 8 * g_ip->block_sz; - if (data_num_r_subarray == 0) return; - if (data_num_r_subarray > MAXSUBARRAYROWS) return; - if (data_num_c_subarray < MINSUBARRAYCOLS) return; - if (data_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } + else { // either fully-asso or cam + if (pure_cam) { + if (g_ip->specific_tag) { + tagbits = int(ceil(g_ip->tag_w / 8.0) * 8); + } else { + tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8); + // cout<<"Pure CAM needs tag width to be + // specified"<> 2) << 2); + + tag_num_r_subarray = (int)ceil( + capacity_per_die / + (g_ip->nbanks * tagbits / 8.0 * + Ndbl)); // TODO: error check input of tagbits and blocksize //TODO: + // for pure CAM, g_ip->block should be number of entries. + // tag_num_c_subarray = (int)(tagbits + EPSILON); + tag_num_c_subarray = tagbits; + if (tag_num_r_subarray == 0) + return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) + return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) + return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) + return; + num_r_subarray = tag_num_r_subarray; + } else // fully associative + { + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = + ADDRESS_BITS + EXTRA_TAG_BITS - + _log2(g_ip->block_sz); // TODO: should be the page_offset=log2(page + // size), but this info is not avail with + // CACTI, for McPAT this is no problem. + } + tagbits = (((tagbits + 3) >> 2) << 2); + + tag_num_r_subarray = + (int)(capacity_per_die / (g_ip->nbanks * g_ip->block_sz * Ndbl)); + tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl)); // + EPSILON); + if (tag_num_r_subarray == 0) + return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) + return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) + return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) + return; + + data_num_r_subarray = tag_num_r_subarray; + data_num_c_subarray = 8 * g_ip->block_sz; + if (data_num_r_subarray == 0) + return; + if (data_num_r_subarray > MAXSUBARRAYROWS) + return; + if (data_num_c_subarray < MINSUBARRAYCOLS) + return; + if (data_num_c_subarray > MAXSUBARRAYCOLS) + return; + num_r_subarray = tag_num_r_subarray; + } } num_subarrays = Ndwl * Ndbl; //****************end of computation of row, col of an subarray // calculate wire parameters - if (fully_assoc || pure_cam) - { - cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) - + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - } - else - { - if(is_tag) - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + - g_ip->num_wr_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + - wire_local.pitch * g_ip->num_se_rd_ports; - } - else - { - if (is_dram) - { - cell.h = g_tp.dram.b_h; - cell.w = g_tp.dram.b_w; - } - else - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + - g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + - g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; - } - } + if (fully_assoc || pure_cam) { + cam_cell.h = + g_tp.cam.b_h + + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + cam_cell.w = + g_tp.cam.b_w + + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + + cell.h = + g_tp.sram.b_h + + 2 * wire_local.pitch * + (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1); + cell.w = g_tp.sram.b_w + + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1); + } else { + if (is_tag) { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + + g_ip->num_rd_ports + g_ip->num_wr_ports); + cell.w = g_tp.sram.b_w + + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + + wire_local.pitch * g_ip->num_se_rd_ports; + } else { + if (is_dram) { + cell.h = g_tp.dram.b_h; + cell.w = g_tp.dram.b_w; + } else { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * + (g_ip->num_wr_ports + g_ip->num_rw_ports - + 1 + g_ip->num_rd_ports); + cell.w = g_tp.sram.b_w + + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; + } + } } double c_b_metal = cell.h * wire_local.C_per_um; double C_bl; - if (!(fully_assoc || pure_cam)) - { - if (is_dram) - { - deg_bl_muxing = 1; - if (ram_cell_tech_type == comm_dram) - { - C_bl = num_r_subarray * c_b_metal; - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); - if (V_b_sense < VBITSENSEMIN) - { - return; - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - dram_refresh_period = 64e-3; - } - else - { - double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); - - if (V_b_sense < VBITSENSEMIN) - { - return; //Sense amp input signal is smaller that minimum allowable sense amp input signal - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; - //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; - dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; - } - } - else - { //SRAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = Ndcm; - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; - } - } - else - { - c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = 1;//FA fix as 1 - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; + if (!(fully_assoc || pure_cam)) { + if (is_dram) { + deg_bl_muxing = 1; + if (ram_cell_tech_type == comm_dram) { + C_bl = num_r_subarray * c_b_metal; + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + if (V_b_sense < VBITSENSEMIN) { + return; + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal + // to a constant value + dram_refresh_period = 64e-3; + } else { + double Cbitrow_drain_cap = + drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + + if (V_b_sense < VBITSENSEMIN) { + return; // Sense amp input signal is smaller that minimum allowable + // sense amp input signal + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal + // to a constant value + // v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * + // (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; dram_refresh_period + // = 1.1 * g_tp.dram_cell_C * v_storage_worst / + // g_tp.dram_cell_I_off_worst_case_len_temp; + dram_refresh_period = + 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * + g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; + } + } else { // SRAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) + ? 0.05 * g_tp.sram_cell.Vdd + : VBITSENSEMIN; + deg_bl_muxing = Ndcm; + // "/ 2.0" below is due to the fact that two adjacent access transistors + // share drain contacts in a physical layout + double Cbitrow_drain_cap = + drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } + } else { + c_b_metal = + cam_cell.h * + wire_local + .C_per_um; // IBM and SUN design, SRAM array uses dummy cells to + // fill the blank space due to mismatch on CAM-RAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) + ? 0.05 * g_tp.sram_cell.Vdd + : VBITSENSEMIN; + deg_bl_muxing = 1; // FA fix as 1 + // "/ 2.0" below is due to the fact that two adjacent access transistors + // share drain contacts in a physical layout + double Cbitrow_drain_cap = + drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / + 2.0; // TODO: comment out these two lines + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; } - - // do/di: data in/out, for fully associative they are the data width for normal read and write - // so/si: search data in/out, for fully associative they are the data width for the search ops - // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) - // so/si needs broadcase while do/di do not - - if (fully_assoc || pure_cam) - { - switch (Ndbl) { - case (0): - cout << " Invalid Ndbl \n"<int_prefetch_w * g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { - if (g_ip->fast_access == true) - { - num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { - - num_do_b_subbank = g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; - if (deg_sa_mux_l1_non_assoc < 1) - { - return; - } - - } - } - } - else - { - num_do_b_subbank = tagbits * g_ip->tag_assoc; - if (num_do_b_mat < tagbits) - { - return; - } - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; - } - } - else - { - if (fully_assoc) - { - num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa - num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; - } - else - { - num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data - num_do_b_subbank = tag_num_c_subarray; - } - - deg_sa_mux_l1_non_assoc = 1; + // TODO:the i/o for subbank is not necessary and should be removed. + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (is_main_mem == true) { + num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { + if (g_ip->fast_access == true) { + num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { + + num_do_b_subbank = g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; + if (deg_sa_mux_l1_non_assoc < 1) { + return; + } + } + } + } else { + num_do_b_subbank = tagbits * g_ip->tag_assoc; + if (num_do_b_mat < tagbits) { + return; + } + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + // num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; + } + } else { + if (fully_assoc) { + num_so_b_subbank = 8 * g_ip->block_sz; // TODO:internal perfetch should be + // considered also for fa + num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; + } else { + num_so_b_subbank = int( + ceil(log2(num_r_subarray)) + + ceil(log2(num_subarrays))); // the address contains the matched data + num_do_b_subbank = tag_num_c_subarray; + } + + deg_sa_mux_l1_non_assoc = 1; } deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; - if (fully_assoc || pure_cam) - { - num_act_mats_hor_dir = 1; - num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used - } - else - { - num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; - if (num_act_mats_hor_dir == 0) - { - return; - } + if (fully_assoc || pure_cam) { + num_act_mats_hor_dir = 1; + num_act_mats_hor_dir_sl = + num_mats_h_dir; // TODO: this is unnecessary, since search op, num_mats + // is used + } else { + num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; + if (num_act_mats_hor_dir == 0) { + return; + } } - //compute num_do_mat for tag - if (is_tag) - { - if (!(fully_assoc || pure_cam)) - { - num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; - num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; - } + // compute num_do_mat for tag + if (is_tag) { + if (!(fully_assoc || pure_cam)) { + num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; + num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; + } } - if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) - { - if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) - { - return; - } + if ((g_ip->is_cache == false && is_main_mem == true) || + (PAGE_MODE == 1 && is_dram)) { + if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != + (int)g_ip->page_sz_bits) { + return; + } } -// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays + // if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam + // && //TODO: TODO burst transfer should also apply to RAM arrays if (is_tag == false && g_ip->is_main_mem == true && - num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) - { - return; + num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 < + ((int)g_ip->out_w * (int)g_ip->burst_len * (int)g_ip->data_assoc)) { + return; } - if (num_act_mats_hor_dir > num_mats_h_dir) - { - return; - } - - - //compute di for mat subbank and bank - if (!(fully_assoc ||pure_cam)) - { - if(!is_tag) - { - if(g_ip->fast_access == true) - { - num_di_b_mat = num_do_b_mat / g_ip->data_assoc; - } - else - { - num_di_b_mat = num_do_b_mat; - } - } - else - { - num_di_b_mat = tagbits; - } + if (num_act_mats_hor_dir > num_mats_h_dir) { + return; } - else - { - if (fully_assoc) - { - num_di_b_mat = num_do_b_mat; - //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, - //but inside the mat wire tracks need to be reserved for search data bus - num_si_b_mat = tagbits; - } - else - { - num_di_b_mat = tagbits; - num_si_b_mat = tagbits;//*num_subarrays/num_mats; - } + // compute di for mat subbank and bank + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (g_ip->fast_access == true) { + num_di_b_mat = num_do_b_mat / g_ip->data_assoc; + } else { + num_di_b_mat = num_do_b_mat; + } + } else { + num_di_b_mat = tagbits; + } + } else { + if (fully_assoc) { + num_di_b_mat = num_do_b_mat; + //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, + // but inside the mat wire tracks need to be reserved for search data bus + num_si_b_mat = tagbits; + } else { + num_di_b_mat = tagbits; + num_si_b_mat = tagbits; //*num_subarrays/num_mats; + } } - num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA - num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + num_di_b_subbank = + num_di_b_mat * num_act_mats_hor_dir; // normal cache or normal r/w for FA + num_si_b_subbank = + num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast - int num_addr_b_row_dec = _log2(num_r_subarray); - if ((fully_assoc ||pure_cam)) - num_addr_b_row_dec +=_log2(num_subarrays/num_mats); - int number_subbanks = num_mats / num_act_mats_hor_dir; - number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + int num_addr_b_row_dec = _log2(num_r_subarray); + if ((fully_assoc || pure_cam)) + num_addr_b_row_dec += _log2(num_subarrays / num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = + _log2(number_subbanks); // TODO: add log2(num_subarray_per_bank) to FA/CAM num_rw_ports = g_ip->num_rw_ports; num_rd_ports = g_ip->num_rd_ports; @@ -644,71 +680,64 @@ DynamicParameter::DynamicParameter( num_se_rd_ports = g_ip->num_se_rd_ports; num_search_ports = g_ip->num_search_ports; - if (is_dram && is_main_mem) - { - number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, - _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); - } - else - { - number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + - _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + if (is_dram && is_main_mem) { + number_addr_bits_mat = + MAX((unsigned int)num_addr_b_row_dec, + _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + + _log2(Ndsam_lev_2)); + } else { + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); } - if (!(fully_assoc ||pure_cam)) - { - if (is_tag) - { - num_di_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->data_assoc; - } - else - { - num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; - num_do_b_bank_per_port = g_ip->out_w; - } - } - else - { - if (fully_assoc) - { - num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->out_w + tagbits; - num_so_b_bank_per_port = g_ip->out_w; - } - else - { - num_di_b_bank_per_port = tagbits; - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = tagbits; - num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); - } + if (!(fully_assoc || pure_cam)) { + if (is_tag) { + num_di_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->data_assoc; + } else { + num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; + num_do_b_bank_per_port = g_ip->out_w; + } + } else { + if (fully_assoc) { + num_di_b_bank_per_port = g_ip->out_w + tagbits; // TODO: out_w or + // block_sz? + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->out_w + tagbits; + num_so_b_bank_per_port = g_ip->out_w; + } else { + num_di_b_bank_per_port = tagbits; + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = tagbits; + num_so_b_bank_per_port = + int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); + } } - if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) - { - number_way_select_signals_mat = g_ip->data_assoc; + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) { + number_way_select_signals_mat = g_ip->data_assoc; } // add ECC adjustment to all data signals that traverse on H-trees. - if (g_ip->add_ecc_b_ == true) - { - num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); - num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); - num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); - num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); - num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); - num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); - - num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); - num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); - num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); - num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); - num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); - num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); + if (g_ip->add_ecc_b_ == true) { + num_do_b_mat += (int)(ceil(num_do_b_mat / num_bits_per_ecc_b_)); + num_di_b_mat += (int)(ceil(num_di_b_mat / num_bits_per_ecc_b_)); + num_di_b_subbank += (int)(ceil(num_di_b_subbank / num_bits_per_ecc_b_)); + num_do_b_subbank += (int)(ceil(num_do_b_subbank / num_bits_per_ecc_b_)); + num_di_b_bank_per_port += + (int)(ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); + num_do_b_bank_per_port += + (int)(ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); + + num_so_b_mat += (int)(ceil(num_so_b_mat / num_bits_per_ecc_b_)); + num_si_b_mat += (int)(ceil(num_si_b_mat / num_bits_per_ecc_b_)); + num_si_b_subbank += (int)(ceil(num_si_b_subbank / num_bits_per_ecc_b_)); + num_so_b_subbank += (int)(ceil(num_so_b_subbank / num_bits_per_ecc_b_)); + num_si_b_bank_per_port += + (int)(ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); + num_so_b_bank_per_port += + (int)(ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); } is_valid = true; } - diff --git a/cacti/parameter.h b/cacti/parameter.h index 2d40f86..c1066ba 100644 --- a/cacti/parameter.h +++ b/cacti/parameter.h @@ -29,27 +29,23 @@ * ***************************************************************************/ - - #ifndef __PARAMETER_H__ #define __PARAMETER_H__ #include "area.h" -#include "const.h" #include "cacti_interface.h" +#include "const.h" #include "io.h" // parameters which are functions of certain device technology -class TechnologyParameter -{ - public: - class DeviceType - { - public: +class TechnologyParameter { +public: + class DeviceType { + public: double C_g_ideal; double C_fringe; double C_overlap; - double C_junc; // C_junc_area + double C_junc; // C_junc_area double C_junc_sidewall; double l_phy; double l_elec; @@ -58,8 +54,15 @@ class TechnologyParameter double Vdd; double Vdd_default; double Vth; - double Vcc_min_default;//allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency; This is the value constrained by the IC technology and cannot by changed by external/user voltage supply - double Vcc_min;//same meaning as Vcc_min_default, however, this value is set by user, once it is lower than Vcc_min_default; circuit (e.g. SRAM cells) cannot retain state. + double Vcc_min_default; // allowed min vcc; for memory cell it is the lowest + // vcc for data retention. for logic it is the vcc + // to balance the leakage reduction and wakeup + // latency; This is the value constrained by the IC + // technology and cannot by changed by external/user + // voltage supply + double Vcc_min; // same meaning as Vcc_min_default, however, this value is + // set by user, once it is lower than Vcc_min_default; + // circuit (e.g. SRAM cells) cannot retain state. double I_on_n; double I_on_p; double I_off_n; @@ -72,35 +75,35 @@ class TechnologyParameter double long_channel_leakage_reduction; double Mobility_n; - DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), - C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), - Vdd(0), Vdd_default(0), Vth(0), Vcc_min(0), - I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0), - C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0), - Mobility_n(0) { }; - void reset() - { + DeviceType() + : C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), + C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), + Vdd(0), Vdd_default(0), Vth(0), Vcc_min(0), I_on_n(0), I_on_p(0), + I_off_n(0), I_off_p(0), I_g_on_n(0), I_g_on_p(0), C_ox(0), t_ox(0), + n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0), + Mobility_n(0){}; + void reset() { C_g_ideal = 0; - C_fringe = 0; + C_fringe = 0; C_overlap = 0; - C_junc = 0; - l_phy = 0; - l_elec = 0; - R_nch_on = 0; - R_pch_on = 0; - Vdd = 0; - Vdd_default =0; - Vth = 0; + C_junc = 0; + l_phy = 0; + l_elec = 0; + R_nch_on = 0; + R_pch_on = 0; + Vdd = 0; + Vdd_default = 0; + Vth = 0; Vcc_min_default = 0; - Vcc_min = 0; - I_on_n = 0; - I_on_p = 0; - I_off_n = 0; - I_off_p = 0; - I_g_on_n = 0; - I_g_on_p = 0; - C_ox = 0; - t_ox = 0; + Vcc_min = 0; + I_on_n = 0; + I_on_p = 0; + I_off_n = 0; + I_off_p = 0; + I_g_on_n = 0; + I_g_on_p = 0; + C_ox = 0; + t_ox = 0; n_to_p_eff_curr_drv_ratio = 0; long_channel_leakage_reduction = 0; Mobility_n = 0; @@ -108,9 +111,8 @@ class TechnologyParameter void display(uint32_t indent = 0); }; - class InterconnectType - { - public: + class InterconnectType { + public: double pitch; double R_per_um; double C_per_um; @@ -120,10 +122,9 @@ class TechnologyParameter double miller_value; double ild_thickness; - InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; + InterconnectType() : pitch(0), R_per_um(0), C_per_um(0){}; - void reset() - { + void reset() { pitch = 0; R_per_um = 0; C_per_um = 0; @@ -136,19 +137,17 @@ class TechnologyParameter void display(uint32_t indent = 0); }; - class MemoryType - { - public: + class MemoryType { + public: double b_w; double b_h; double cell_a_w; double cell_pmos_w; double cell_nmos_w; double Vbitpre; - double Vbitfloating;//voltage when floating bitline is supported + double Vbitfloating; // voltage when floating bitline is supported - void reset() - { + void reset() { b_w = 0; b_h = 0; cell_a_w = 0; @@ -161,21 +160,20 @@ class TechnologyParameter void display(uint32_t indent = 0); }; - class ScalingFactor - { - public: + class ScalingFactor { + public: double logic_scaling_co_eff; double core_tx_density; double long_channel_leakage_reduction; - ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), - long_channel_leakage_reduction(0) { }; + ScalingFactor() + : logic_scaling_co_eff(0), core_tx_density(0), + long_channel_leakage_reduction(0){}; - void reset() - { - logic_scaling_co_eff= 0; + void reset() { + logic_scaling_co_eff = 0; core_tx_density = 0; - long_channel_leakage_reduction= 0; + long_channel_leakage_reduction = 0; } void display(uint32_t indent = 0); @@ -237,9 +235,9 @@ class TechnologyParameter DeviceType dram_acc; // DRAM access transistor DeviceType dram_wl; // DRAM wordline transistor DeviceType peri_global; // peripheral global - DeviceType cam_cell; // SRAM cell transistor + DeviceType cam_cell; // SRAM cell transistor - DeviceType sleep_tx; // Sleep transistor cell transistor + DeviceType sleep_tx; // Sleep transistor cell transistor InterconnectType wire_local; InterconnectType wire_inside_mat; @@ -253,21 +251,20 @@ class TechnologyParameter void display(uint32_t indent = 0); - void reset() - { - dram_cell_Vdd = 0; + void reset() { + dram_cell_Vdd = 0; dram_cell_I_on = 0; - dram_cell_C = 0; - vpp = 0; - - sense_delay = 0; - sense_dy_power = 0; - fringe_cap = 0; -// horiz_dielectric_constant = 0; -// vert_dielectric_constant = 0; -// aspect_ratio = 0; -// miller_value = 0; -// ild_thickness = 0; + dram_cell_C = 0; + vpp = 0; + + sense_delay = 0; + sense_dy_power = 0; + fringe_cap = 0; + // horiz_dielectric_constant = 0; + // vert_dielectric_constant = 0; + // aspect_ratio = 0; + // miller_value = 0; + // ild_thickness = 0; dram_cell_I_off_worst_case_len_temp = 0; @@ -288,94 +285,83 @@ class TechnologyParameter dram.reset(); cam.reset(); - chip_layout_overhead = 0; + chip_layout_overhead = 0; macro_layout_overhead = 0; - sckt_co_eff = 0; + sckt_co_eff = 0; } }; - - -class DynamicParameter -{ - public: - bool is_tag; - bool pure_ram; - bool pure_cam; - bool fully_assoc; - int tagbits; - int num_subarrays; // only for leakage computation -- the number of subarrays per bank - int num_mats; // only for leakage computation -- the number of mats per bank - double Nspd; - int Ndwl; - int Ndbl; - int Ndcm; - int deg_bl_muxing; - int deg_senseamp_muxing_non_associativity; - int Ndsam_lev_1; - int Ndsam_lev_2; - int number_addr_bits_mat; // per port - int number_subbanks_decode; // per_port - int num_di_b_bank_per_port; - int num_do_b_bank_per_port; - int num_di_b_mat; - int num_do_b_mat; - int num_di_b_subbank; - int num_do_b_subbank; - - int num_si_b_mat; - int num_so_b_mat; - int num_si_b_subbank; - int num_so_b_subbank; - int num_si_b_bank_per_port; - int num_so_b_bank_per_port; - - int number_way_select_signals_mat; - int num_act_mats_hor_dir; - - int num_act_mats_hor_dir_sl; - bool is_dram; - double V_b_sense; - unsigned int num_r_subarray; - unsigned int num_c_subarray; - int tag_num_r_subarray;//sheng: fully associative cache tag and data must be computed together, data and tag must be separate - int tag_num_c_subarray; - int data_num_r_subarray; - int data_num_c_subarray; - int num_mats_h_dir; - int num_mats_v_dir; - uint32_t ram_cell_tech_type; - double dram_refresh_period; - - DynamicParameter(); - DynamicParameter( - bool is_tag_, - int pure_ram_, - int pure_cam_, - double Nspd_, - unsigned int Ndwl_, - unsigned int Ndbl_, - unsigned int Ndcm_, - unsigned int Ndsam_lev_1_, - unsigned int Ndsam_lev_2_, - bool is_main_mem_); - - int use_inp_params; - unsigned int num_rw_ports; - unsigned int num_rd_ports; - unsigned int num_wr_ports; - unsigned int num_se_rd_ports; // number of single ended read ports - unsigned int num_search_ports; - unsigned int out_w;// == nr_bits_out - bool is_main_mem; - Area cell, cam_cell;//cell is the sram_cell in both nomal cache/ram and FA. - bool is_valid; +class DynamicParameter { +public: + bool is_tag; + bool pure_ram; + bool pure_cam; + bool fully_assoc; + int tagbits; + int num_subarrays; // only for leakage computation -- the number of subarrays + // per bank + int num_mats; // only for leakage computation -- the number of mats per bank + double Nspd; + int Ndwl; + int Ndbl; + int Ndcm; + int deg_bl_muxing; + int deg_senseamp_muxing_non_associativity; + int Ndsam_lev_1; + int Ndsam_lev_2; + int number_addr_bits_mat; // per port + int number_subbanks_decode; // per_port + int num_di_b_bank_per_port; + int num_do_b_bank_per_port; + int num_di_b_mat; + int num_do_b_mat; + int num_di_b_subbank; + int num_do_b_subbank; + + int num_si_b_mat; + int num_so_b_mat; + int num_si_b_subbank; + int num_so_b_subbank; + int num_si_b_bank_per_port; + int num_so_b_bank_per_port; + + int number_way_select_signals_mat; + int num_act_mats_hor_dir; + + int num_act_mats_hor_dir_sl; + bool is_dram; + double V_b_sense; + unsigned int num_r_subarray; + unsigned int num_c_subarray; + int tag_num_r_subarray; // sheng: fully associative cache tag and data must be + // computed together, data and tag must be separate + int tag_num_c_subarray; + int data_num_r_subarray; + int data_num_c_subarray; + int num_mats_h_dir; + int num_mats_v_dir; + uint32_t ram_cell_tech_type; + double dram_refresh_period; + + DynamicParameter(); + DynamicParameter(bool is_tag_, int pure_ram_, int pure_cam_, double Nspd_, + unsigned int Ndwl_, unsigned int Ndbl_, unsigned int Ndcm_, + unsigned int Ndsam_lev_1_, unsigned int Ndsam_lev_2_, + bool is_main_mem_); + + int use_inp_params; + unsigned int num_rw_ports; + unsigned int num_rd_ports; + unsigned int num_wr_ports; + unsigned int num_se_rd_ports; // number of single ended read ports + unsigned int num_search_ports; + unsigned int out_w; // == nr_bits_out + bool is_main_mem; + Area cell, cam_cell; // cell is the sram_cell in both nomal cache/ram and FA. + bool is_valid; }; - - -extern InputParameter * g_ip; +extern InputParameter *g_ip; extern TechnologyParameter g_tp; #endif - diff --git a/cacti/powergating.cc b/cacti/powergating.cc index 8141927..cf0ec8b 100644 --- a/cacti/powergating.cc +++ b/cacti/powergating.cc @@ -29,119 +29,130 @@ * ***************************************************************************/ -#include "area.h" #include "powergating.h" + +#include "area.h" #include "parameter.h" + +#include #include #include -#include using namespace std; /* - * Sizing of sleep tx is independent of sleep/power-saving supply voltage, sleep/power-saving supply voltage only affects wake-up energy and time + * Sizing of sleep tx is independent of sleep/power-saving supply voltage, + * sleep/power-saving supply voltage only affects wake-up energy and time * * While using DSTN (Distributed sleep tx network), worst case sizing is used. - * For DSTN, the network can help to reduce the runtime latency (or achieve the same latency with smaller transistor size) - * For example, during write access, if not all bits are toggled, the sleep tx in the non-toggled path can work as the extra - * discharge paths of all the toggled bits, in addition to the sleep tx in the bitlines with the toggled bits. Since CACTI itself - * assumes worst case with all bits toggled, sleep txs are assumed to work all the time with all bits toggled, - * Therefore, although DTSN is used, for memory array, the number of sleep txs is related to the number of rows and cols., - * and all calculations are still base on single sleep tx for each discharge case. Of couse in each discharge path, the sleep - * tx is the charge path of all the devices in the same path (row or col). + * For DSTN, the network can help to reduce the runtime latency (or achieve the + * same latency with smaller transistor size) For example, during write access, + * if not all bits are toggled, the sleep tx in the non-toggled path can work as + * the extra discharge paths of all the toggled bits, in addition to the sleep + * tx in the bitlines with the toggled bits. Since CACTI itself assumes worst + * case with all bits toggled, sleep txs are assumed to work all the time with + * all bits toggled, Therefore, although DTSN is used, for memory array, the + * number of sleep txs is related to the number of rows and cols., and all + * calculations are still base on single sleep tx for each discharge case. Of + * couse in each discharge path, the sleep tx is the charge path of all the + * devices in the same path (row or col). * * Even in the worse case sizing, the wakeup time will not change - * since all paths need to charge/discharge---each sleep tx is just do its own portion of the work during wakeup or entering sleep state. + * since all paths need to charge/discharge---each sleep tx is just do its own + * portion of the work during wakeup or entering sleep state. * - * Power-gating and DVS cannot happen at the same time! Because power-gating happens when circuit is idle, - * while DVS happens when circuit is active. - * When waking up from power-gating status, it is assumed that the system will first wakeup to DVS0 (full speed) state, if DVS is enabled in - * the system. + * Power-gating and DVS cannot happen at the same time! Because power-gating + * happens when circuit is idle, while DVS happens when circuit is active. When + * waking up from power-gating status, it is assumed that the system will first + * wakeup to DVS0 (full speed) state, if DVS is enabled in the system. * * * -*/ -Sleep_tx::Sleep_tx( - double _perf_with_sleep_tx, - double _active_Isat,//of circuit block, not sleep tx - bool _is_footer, - double _c_circuit_wakeup, - double _V_delta, - int _num_sleep_tx, -// double _vt_circuit, -// double _vt_sleep_tx, -// double _mobility,//of sleep tx -// double _c_ox,//of sleep tx - const Area & cell_) -:perf_with_sleep_tx(_perf_with_sleep_tx), - active_Isat(_active_Isat), - is_footer(_is_footer), - c_circuit_wakeup(_c_circuit_wakeup), - V_delta(_V_delta), - num_sleep_tx(_num_sleep_tx), -// vt_circuit(_vt_circuit), -// vt_sleep_tx(_vt_sleep_tx), -// mobility(_mobility), -// c_ox(_c_ox) - cell(cell_), - is_sleep_tx(true) -{ - - //a single sleep tx in a network - double raw_area, raw_width, raw_hight; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); - vdd = g_tp.peri_global.Vdd; - vt_circuit = g_tp.peri_global.Vth; - vt_sleep_tx = g_tp.sleep_tx.Vth; - mobility = g_tp.sleep_tx.Mobility_n; - c_ox = g_tp.sleep_tx.C_ox; + */ +Sleep_tx::Sleep_tx(double _perf_with_sleep_tx, + double _active_Isat, // of circuit block, not sleep tx + bool _is_footer, double _c_circuit_wakeup, double _V_delta, + int _num_sleep_tx, + // double _vt_circuit, + // double _vt_sleep_tx, + // double _mobility,//of sleep tx + // double _c_ox,//of sleep tx + const Area &cell_) + : perf_with_sleep_tx(_perf_with_sleep_tx), active_Isat(_active_Isat), + is_footer(_is_footer), c_circuit_wakeup(_c_circuit_wakeup), + V_delta(_V_delta), num_sleep_tx(_num_sleep_tx), + // vt_circuit(_vt_circuit), + // vt_sleep_tx(_vt_sleep_tx), + // mobility(_mobility), + // c_ox(_c_ox) + cell(cell_), is_sleep_tx(true) { - width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers - width /= num_sleep_tx; + // a single sleep tx in a network + double raw_area, raw_width, raw_hight; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); + vdd = g_tp.peri_global.Vdd; + vt_circuit = g_tp.peri_global.Vth; + vt_sleep_tx = g_tp.sleep_tx.Vth; + mobility = g_tp.sleep_tx.Mobility_n; + c_ox = g_tp.sleep_tx.C_ox; -// double cell_hight = MAX(cell.w*2, g_tp.cell_h_def); - raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.h)/2; //Only single device, assuming device is laid on the side of the circuit block without changing the height of the standard library cells (using the standard cell approach). - raw_width = cell.w; - raw_hight = raw_area/cell.w; - area.set_h(raw_hight); - area.set_w(raw_width); + width = active_Isat / + (perf_with_sleep_tx * mobility * c_ox * (vdd - vt_circuit) * + (vdd - vt_sleep_tx)) * + g_ip->F_sz_um; // W/L uses physical numbers + width /= num_sleep_tx; - compute_penalty(); + // double cell_hight = MAX(cell.w*2, g_tp.cell_h_def); + raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio * width, cell.h) / + 2; // Only single device, assuming device is laid on the side of + // the circuit block without changing the height of the standard + // library cells (using the standard cell approach). + raw_width = cell.w; + raw_hight = raw_area / cell.w; + area.set_h(raw_hight); + area.set_w(raw_width); + compute_penalty(); } -double Sleep_tx::compute_penalty() -{ - //V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta - double c_load; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); +double Sleep_tx::compute_penalty() { + // V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although + // it might be OK to use sleep tx to control the V_delta + double c_load; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); - if (is_footer) - { - c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx); -// V_delta = _V_delta; - wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); - wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; - //no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs - } - else - { - c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx); -// V_delta = _V_delta; - wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); - wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; - } + if (is_footer) { + c_intrinsic_sleep = + drain_C_(width, NCH, 1, 1, area.h, false, false, false, is_sleep_tx); + // V_delta = _V_delta; + wakeup_delay = + (c_circuit_wakeup + c_intrinsic_sleep) * V_delta / + (simplified_nmos_Isat(width, false, false, false, is_sleep_tx) / + Ilinear_to_Isat_ratio); + wakeup_power.readOp.dynamic = + (c_circuit_wakeup + c_intrinsic_sleep) * g_tp.sram_cell.Vdd * V_delta; + // no 0.5 because the half of the energy spend in entering sleep and half of + // the energy will be spent in waking up. And they are pairs + } else { + c_intrinsic_sleep = drain_C_(width * p_to_n_sz_ratio, PCH, 1, 1, area.h, + false, false, false, is_sleep_tx); + // V_delta = _V_delta; + wakeup_delay = + (c_circuit_wakeup + c_intrinsic_sleep) * V_delta / + (simplified_pmos_Isat(width, false, false, false, is_sleep_tx) / + Ilinear_to_Isat_ratio); + wakeup_power.readOp.dynamic = + (c_circuit_wakeup + c_intrinsic_sleep) * g_tp.sram_cell.Vdd * V_delta; + } -/* - The number of cycles in the wake-up latency set the constraint on the - minimum number of idle clock cycles needed before a processor - can enter in the corresponding sleep mode without any wakeup - overhead. - - If the circuit is half way to sleep then waken up, it is still OK - just the wakeup latency will be shorter than the wakeup time from full asleep. - So, the sleep time and energy does not matter -*/ + /* + The number of cycles in the wake-up latency set the constraint on the + minimum number of idle clock cycles needed before a processor + can enter in the corresponding sleep mode without any wakeup + overhead. + If the circuit is half way to sleep then waken up, it is still OK + just the wakeup latency will be shorter than the wakeup time from full + asleep. So, the sleep time and energy does not matter + */ } - diff --git a/cacti/powergating.h b/cacti/powergating.h index 9c8ee36..72415d8 100644 --- a/cacti/powergating.h +++ b/cacti/powergating.h @@ -34,49 +34,44 @@ #include "component.h" -class Sleep_tx : public Component -{ +class Sleep_tx : public Component { public: - Sleep_tx( - double _perf_with_sleep_tx, - double _active_Isat,//of circuit block, not sleep tx - bool _is_footer, - double _c_circuit_wakeup, - double _V_delta, - int _num_sleep_tx, - // double _vt_circuit, - // double _vt_sleep_tx, - // double _mobility,//of sleep tx - // double _c_ox,//of sleep tx - const Area & cell_); + Sleep_tx(double _perf_with_sleep_tx, + double _active_Isat, // of circuit block, not sleep tx + bool _is_footer, double _c_circuit_wakeup, double _V_delta, + int _num_sleep_tx, + // double _vt_circuit, + // double _vt_sleep_tx, + // double _mobility,//of sleep tx + // double _c_ox,//of sleep tx + const Area &cell_); - double perf_with_sleep_tx; - double active_Isat; - bool is_footer; - int num_sleep_tx; - double vt_circuit; - double vt_sleep_tx; - double vdd;// of circuit block not sleep tx - double mobility;//of sleep tx - double c_ox; - double width; - double c_circuit_wakeup; - double c_intrinsic_sleep; - double delay, wakeup_delay; - powerDef power, wakeup_power; -// double c_circuit_sleep; -// double sleep_delay; -// powerDef sleep_power; - const Area & cell; - bool is_sleep_tx; - double V_delta; + double perf_with_sleep_tx; + double active_Isat; + bool is_footer; + int num_sleep_tx; + double vt_circuit; + double vt_sleep_tx; + double vdd; // of circuit block not sleep tx + double mobility; // of sleep tx + double c_ox; + double width; + double c_circuit_wakeup; + double c_intrinsic_sleep; + double delay, wakeup_delay; + powerDef power, wakeup_power; + // double c_circuit_sleep; + // double sleep_delay; + // powerDef sleep_power; + const Area &cell; + bool is_sleep_tx; + double V_delta; + // void compute_area(); + double compute_penalty(); // return outrisetime -// void compute_area(); - double compute_penalty(); // return outrisetime - - void leakage_feedback(double temperature){}; - ~Sleep_tx(){}; + void leakage_feedback(double temperature){}; + ~Sleep_tx(){}; }; #endif /* POWERGATING_H_ */ diff --git a/cacti/router.cc b/cacti/router.cc index b8c22d3..635167d 100644 --- a/cacti/router.cc +++ b/cacti/router.cc @@ -29,123 +29,101 @@ * ***************************************************************************/ - - #include "router.h" -Router::Router( - double flit_size_, - double vc_buf, /* vc size = vc_buffer_size * flit_size */ - double vc_c, - TechnologyParameter::DeviceType *dt, - double I_, - double O_, - double M_ - ):flit_size(flit_size_), - deviceType(dt), - I(I_), - O(O_), - M(M_) -{ +Router::Router(double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_c, TechnologyParameter::DeviceType *dt, double I_, + double O_, double M_) + : flit_size(flit_size_), deviceType(dt), I(I_), O(O_), M(M_) { vc_buffer_size = vc_buf; vc_count = vc_c; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; double technology = g_ip->F_sz_um; Vdd = dt->Vdd; /*Crossbar parameters. Transmisson gate is employed for connector*/ - NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology*1e-6/2; /* pmos tr. length*/ - wt = 15*technology*1e-6/2; /*track width*/ - ht = 15*technology*1e-6/2; /*track height*/ -// I = 5; /*Number of crossbar input ports*/ -// O = 5; /*Number of crossbar output ports*/ - NTi = 12.5*technology*1e-6/2; - PTi = 25*technology*1e-6/2; - - NTid = 60*technology*1e-6/2; //m - PTid = 120*technology*1e-6/2; // m - NTod = 60*technology*1e-6/2; // m - PTod = 120*technology*1e-6/2; // m + NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/ + wt = 15 * technology * 1e-6 / 2; /*track width*/ + ht = 15 * technology * 1e-6 / 2; /*track height*/ + // I = 5; /*Number of crossbar input ports*/ + // O = 5; /*Number of crossbar output ports*/ + NTi = 12.5 * technology * 1e-6 / 2; + PTi = 25 * technology * 1e-6 / 2; + + NTid = 60 * technology * 1e-6 / 2; // m + PTid = 120 * technology * 1e-6 / 2; // m + NTod = 60 * technology * 1e-6 / 2; // m + PTod = 120 * technology * 1e-6 / 2; // m calc_router_parameters(); } -Router::~Router(){} +Router::~Router() {} - -double //wire cap with triple spacing +double // wire cap with triple spacing Router::Cw3(double length) { Wire wc(g_ip->wt, length, 1, 3, 3); return (wc.wire_cap(length)); } /*Function to calculate the gate capacitance*/ -double -Router::gate_cap(double w) { - return (double) gate_C (w*1e6 /*u*/, 0); -} +double Router::gate_cap(double w) { return (double)gate_C(w * 1e6 /*u*/, 0); } /*Function to calculate the diffusion capacitance*/ -double -Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, - double s /*number of stacking transistors*/) { - return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); +double Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, + double s /*number of stacking transistors*/) { + return (double)drain_C_(w * 1e6 /*u*/, type, (int)s, 1, g_tp.cell_h_def); } - /*crossbar related functions */ // Model for simple transmission gate -double -Router::transmission_buf_inpcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); +double Router::transmission_buf_inpcap() { + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } -double -Router::transmission_buf_outcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); +double Router::transmission_buf_outcap() { + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } -double -Router::transmission_buf_ctrcap() { - return gate_cap(NTtr)+gate_cap(PTtr); +double Router::transmission_buf_ctrcap() { + return gate_cap(NTtr) + gate_cap(PTtr); } -double -Router::crossbar_inpline() { - return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + - gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); +double Router::crossbar_inpline() { + return (Cw3(O * flit_size * wt) + O * transmission_buf_inpcap() + + gate_cap(NTid) + gate_cap(PTid) + diff_cap(NTid, 0, 1) + + diff_cap(PTid, 1, 1)); } -double -Router::crossbar_outline() { - return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + - gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); +double Router::crossbar_outline() { + return (Cw3(I * flit_size * ht) + I * transmission_buf_outcap() + + gate_cap(NTod) + gate_cap(PTod) + diff_cap(NTod, 0, 1) + + diff_cap(PTod, 1, 1)); } -double -Router::crossbar_ctrline() { - return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + - diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + - gate_cap(NTi) + gate_cap(PTi)); +double Router::crossbar_ctrline() { + return (Cw3(0.5 * O * flit_size * wt) + + flit_size * transmission_buf_ctrcap() + diff_cap(NTi, 0, 1) + + diff_cap(PTi, 1, 1) + gate_cap(NTi) + gate_cap(PTi)); } -double -Router::tr_crossbar_power() { - return (crossbar_inpline()*Vdd*Vdd*flit_size/2 + - crossbar_outline()*Vdd*Vdd*flit_size/2)*2; +double Router::tr_crossbar_power() { + return (crossbar_inpline() * Vdd * Vdd * flit_size / 2 + + crossbar_outline() * Vdd * Vdd * flit_size / 2) * + 2; } -void Router::buffer_stats() -{ +void Router::buffer_stats() { DynamicParameter dyn_p; - dyn_p.is_tag = false; - dyn_p.pure_cam = false; + dyn_p.is_tag = false; + dyn_p.pure_cam = false; dyn_p.fully_assoc = false; - dyn_p.pure_ram = true; - dyn_p.is_dram = false; + dyn_p.pure_ram = true; + dyn_p.is_dram = false; dyn_p.is_main_mem = false; dyn_p.num_subarrays = 1; dyn_p.num_mats = 1; @@ -163,48 +141,46 @@ void Router::buffer_stats() dyn_p.num_act_mats_hor_dir = 1; dyn_p.V_b_sense = Vdd; // FIXME check power calc. dyn_p.ram_cell_tech_type = 0; - dyn_p.num_r_subarray = (int) vc_buffer_size; - dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; + dyn_p.num_r_subarray = (int)vc_buffer_size; + dyn_p.num_c_subarray = (int)flit_size * (int)vc_count; dyn_p.num_mats_h_dir = 1; dyn_p.num_mats_v_dir = 1; dyn_p.num_do_b_subbank = (int)flit_size; dyn_p.num_di_b_subbank = (int)flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_bank_per_port = (int) flit_size; - dyn_p.num_di_b_bank_per_port = (int) flit_size; - dyn_p.out_w = (int) flit_size; + dyn_p.num_do_b_mat = (int)flit_size; + dyn_p.num_di_b_mat = (int)flit_size; + dyn_p.num_do_b_mat = (int)flit_size; + dyn_p.num_di_b_mat = (int)flit_size; + dyn_p.num_do_b_bank_per_port = (int)flit_size; + dyn_p.num_di_b_bank_per_port = (int)flit_size; + dyn_p.out_w = (int)flit_size; dyn_p.use_inp_params = 1; - dyn_p.num_wr_ports = (unsigned int) vc_count; - dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book + dyn_p.num_wr_ports = (unsigned int)vc_count; + dyn_p.num_rd_ports = 1; //(unsigned int) vc_count;//based on Bill Dally's book dyn_p.num_rw_ports = 0; - dyn_p.num_se_rd_ports =0; - dyn_p.num_search_ports =0; - - - - dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + - dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); - dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + - (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + - dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; + dyn_p.num_se_rd_ports = 0; + dyn_p.num_search_ports = 0; + + dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * + (dyn_p.num_wr_ports + dyn_p.num_rw_ports - + 1 + dyn_p.num_rd_ports); + dyn_p.cell.w = + g_tp.sram.b_w + + 2 * g_tp.wire_outside_mat.pitch * + (dyn_p.num_rw_ports - 1 + + (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + dyn_p.num_wr_ports) + + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; Mat buff(dyn_p); buff.compute_delays(0); buff.compute_power_energy(); - buffer.power.readOp = buff.power.readOp; - buffer.power.writeOp = buffer.power.readOp; //FIXME + buffer.power.readOp = buff.power.readOp; + buffer.power.writeOp = buffer.power.readOp; // FIXME buffer.area = buff.area; } - - - void -Router::cb_stats () -{ +void Router::cb_stats() { if (1) { Crossbar c_b(I, O, flit_size); c_b.compute_power(); @@ -213,20 +189,19 @@ Router::cb_stats () crossbar.power.readOp.leakage = c_b.power.readOp.leakage; crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; crossbar.area = c_b.area; -// c_b.print_crossbar(); - } - else { + // c_b.print_crossbar(); + } else { crossbar.power.readOp.dynamic = tr_crossbar_power(); - crossbar.power.readOp.leakage = flit_size * I * O * - cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); - crossbar.power.readOp.gate_leakage = flit_size * I * O * - cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); + crossbar.power.readOp.leakage = + flit_size * I * O * + cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); + crossbar.power.readOp.gate_leakage = + flit_size * I * O * + cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); } } -void -Router::get_router_power() -{ +void Router::get_router_power() { /* calculate buffer stats */ buffer_stats(); @@ -238,45 +213,40 @@ Router::get_router_power() Arbiter cbarb(I, flit_size, crossbar.area.w); vcarb.compute_power(); cbarb.compute_power(); - arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + - cbarb.power.readOp.dynamic * O; - arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + - cbarb.power.readOp.leakage * O; - arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + - cbarb.power.readOp.gate_leakage * O; - -// arb_stats(); - power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) + - crossbar.power.readOp.dynamic + - arbiter.power.readOp.dynamic)*MIN(I, O)*M; - double pppm_t[4] = {1,I,I,1}; - power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg; - + arbiter.power.readOp.dynamic = + vcarb.power.readOp.dynamic * I + cbarb.power.readOp.dynamic * O; + arbiter.power.readOp.leakage = + vcarb.power.readOp.leakage * I + cbarb.power.readOp.leakage * O; + arbiter.power.readOp.gate_leakage = + vcarb.power.readOp.gate_leakage * I + cbarb.power.readOp.gate_leakage * O; + + // arb_stats(); + power.readOp.dynamic = + ((buffer.power.readOp.dynamic + buffer.power.writeOp.dynamic) + + crossbar.power.readOp.dynamic + arbiter.power.readOp.dynamic) * + MIN(I, O) * M; + double pppm_t[4] = {1, I, I, 1}; + power = power + + (buffer.power * pppm_t + crossbar.power + arbiter.power) * pppm_lkg; } - void -Router::get_router_delay () -{ - FREQUENCY=5; // move this to config file --TODO - cycle_time = (1/(double)FREQUENCY)*1e3; //ps +void Router::get_router_delay() { + FREQUENCY = 5; // move this to config file --TODO + cycle_time = (1 / (double)FREQUENCY) * 1e3; // ps delay = 4; - max_cyc = 17 * g_tp.FO4; //s - max_cyc *= 1e12; //ps + max_cyc = 17 * g_tp.FO4; // s + max_cyc *= 1e12; // ps if (cycle_time < max_cyc) { - FREQUENCY = (1/max_cyc)*1e3; //GHz + FREQUENCY = (1 / max_cyc) * 1e3; // GHz } } - void -Router::get_router_area() -{ - area.h = I*buffer.area.h; - area.w = buffer.area.w+crossbar.area.w; +void Router::get_router_area() { + area.h = I * buffer.area.h; + area.w = buffer.area.w + crossbar.area.w; } - void -Router::calc_router_parameters() -{ +void Router::calc_router_parameters() { /* calculate router frequency and pipeline cycles */ get_router_delay(); @@ -287,25 +257,30 @@ Router::calc_router_parameters() get_router_area(); } - void -Router::print_router() -{ +void Router::print_router() { cout << "\n\nRouter stats:\n"; - cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n"; - cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n"; + cout << "\tRouter Area - " << area.get_area() * 1e-6 << "(mm^2)\n"; + cout << "\tMaximum possible network frequency - " << (1 / max_cyc) * 1e3 + << "GHz\n"; + cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n"; cout << "\tNo. of Virtual channels - " << vc_count << "\n"; cout << "\tNo. of pipeline stages - " << delay << endl; cout << "\tLink bandwidth - " << flit_size << " (bits)\n"; - cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n"; - cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n"; - cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n"; - cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n"; - cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n"; - cout << "\tArbiter access energy (VC arb + Crossbar arb) - "< -#include +#include "arbiter.h" #include "basic_circuit.h" #include "cacti_interface.h" #include "component.h" +#include "crossbar.h" #include "mat.h" #include "parameter.h" #include "wire.h" -#include "crossbar.h" -#include "arbiter.h" - - - -class Router : public Component -{ - public: - Router( - double flit_size_, - double vc_buf, /* vc size = vc_buffer_size * flit_size */ - double vc_count, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global), - double I_ = 5, - double O_ = 5, - double M_ = 0.6); - ~Router(); - - - void print_router(); - - Component arbiter, crossbar, buffer; - - double cycle_time, max_cyc; - double flit_size; - double vc_count; - double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */ - - private: - TechnologyParameter::DeviceType *deviceType; - double FREQUENCY; // move this to config file --TODO - double Cw3(double len); - double gate_cap(double w); - double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack); - enum Wire_type wtype; - enum Wire_placement wire_placement; - //corssbar - double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2; - double M; //network load - double transmission_buf_inpcap(); - double transmission_buf_outcap(); - double transmission_buf_ctrcap(); - double crossbar_inpline(); - double crossbar_outline(); - double crossbar_ctrline(); - double tr_crossbar_power(); - void cb_stats (); - double arb_power(); - void arb_stats (); - double buffer_params(); - void buffer_stats(); - - - //arbiter - - //buffer - - //router params - double Vdd; - - void calc_router_parameters(); - void get_router_area(); - void get_router_power(); - void get_router_delay(); - - double min_w_pmos; +#include +#include +class Router : public Component { +public: + Router(double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_count, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global), + double I_ = 5, double O_ = 5, double M_ = 0.6); + ~Router(); + + void print_router(); + + Component arbiter, crossbar, buffer; + + double cycle_time, max_cyc; + double flit_size; + double vc_count; + double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */ + +private: + TechnologyParameter::DeviceType *deviceType; + double FREQUENCY; // move this to config file --TODO + double Cw3(double len); + double gate_cap(double w); + double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, + double stack); + enum Wire_type wtype; + enum Wire_placement wire_placement; + // corssbar + double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, + TriS2; + double M; // network load + double transmission_buf_inpcap(); + double transmission_buf_outcap(); + double transmission_buf_ctrcap(); + double crossbar_inpline(); + double crossbar_outline(); + double crossbar_ctrline(); + double tr_crossbar_power(); + void cb_stats(); + double arb_power(); + void arb_stats(); + double buffer_params(); + void buffer_stats(); + + // arbiter + + // buffer + + // router params + double Vdd; + + void calc_router_parameters(); + void get_router_area(); + void get_router_power(); + void get_router_delay(); + + double min_w_pmos; }; #endif diff --git a/cacti/subarray.cc b/cacti/subarray.cc old mode 100755 new mode 100644 index ef5737d..b262ec0 --- a/cacti/subarray.cc +++ b/cacti/subarray.cc @@ -29,169 +29,183 @@ * ***************************************************************************/ +#include "subarray.h" - - +#include #include #include -#include -#include "subarray.h" - - -Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_): - dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), - num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), - cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) -{ - //num_cols=7; - //cout<<"num_cols ="<< num_cols < #include "basic_circuit.h" - #include "parameter.h" -double wire_resistance(double resistivity, double wire_width, double wire_thickness, - double barrier_thickness, double dishing_thickness, double alpha_scatter) -{ +#include + +double wire_resistance(double resistivity, double wire_width, + double wire_thickness, double barrier_thickness, + double dishing_thickness, double alpha_scatter) { double resistance; - resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness)); - return(resistance); + resistance = alpha_scatter * resistivity / + ((wire_thickness - barrier_thickness - dishing_thickness) * + (wire_width - 2 * barrier_thickness)); + return (resistance); } -double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing, - double ild_thickness, double miller_value, double horiz_dielectric_constant, - double vert_dielectric_constant, double fringe_cap) -{ +double wire_capacitance(double wire_width, double wire_thickness, + double wire_spacing, double ild_thickness, + double miller_value, double horiz_dielectric_constant, + double vert_dielectric_constant, double fringe_cap) { double vertical_cap, sidewall_cap, total_cap; - vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness; - sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing; + vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * + wire_width / ild_thickness; + sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * + horiz_dielectric_constant * wire_thickness / wire_spacing; total_cap = vertical_cap + sidewall_cap + fringe_cap; - return(total_cap); + return (total_cap); } - -void init_tech_params(double technology, bool is_tag) -{ - int iter, tech, tech_lo, tech_hi; +void init_tech_params(double technology, bool is_tag) { + int iter, tech, tech_lo, tech_hi; double curr_alpha, curr_vpp; - double wire_width, wire_thickness, wire_spacing, - fringe_cap, pmos_to_nmos_sizing_r; -// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant; + double wire_width, wire_thickness, wire_spacing, fringe_cap, + pmos_to_nmos_sizing_r; + // double aspect_ratio,ild_thickness, miller_value = 1.5, + // horiz_dielectric_constant, vert_dielectric_constant; double barrier_thickness, dishing_thickness, alpha_scatter; - double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell; + double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, + curr_I_on_dram_cell, curr_c_dram_cell; - uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; + uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type + : g_ip->data_arr_ram_cell_tech_type; + uint32_t peri_global_tech_type = (is_tag) + ? g_ip->tag_arr_peri_global_tech_type + : g_ip->data_arr_peri_global_tech_type; - technology = technology * 1000.0; // in the unit of nm + technology = technology * 1000.0; // in the unit of nm // initialize parameters g_tp.reset(); double gmp_to_gmn_multiplier_periph_global = 0; double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, - curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, - curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, - curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; - double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data - curr_asp_ratio_cell_cam; + curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, + curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, + curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; + double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, + curr_area_cell_cam, // Sheng: CAM data + curr_asp_ratio_cell_cam; double SENSE_AMP_D, SENSE_AMP_P; // J double area_cell_dram = 0; double asp_ratio_cell_dram = 0; @@ -91,91 +99,76 @@ void init_tech_params(double technology, bool is_tag) double nmos_effective_resistance_multiplier; double width_dram_access_transistor; - double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date - double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn + double curr_logic_scaling_co_eff = + 0; // This is based on the reported numbers of Intel Merom 65nm, + // Penryn45nm and IBM cell 90/65/45 date + double curr_core_tx_density = + 0; // this is density per um^2; 90, ...22nm based on Intel Penryn double curr_chip_layout_overhead = 0; double curr_macro_layout_overhead = 0; double curr_sckt_co_eff = 0; - if (technology < 181 && technology > 179) - { - tech_lo = 180; - tech_hi = 180; - } - else if (technology < 91 && technology > 89) - { + if (technology < 181 && technology > 179) { + tech_lo = 180; + tech_hi = 180; + } else if (technology < 91 && technology > 89) { tech_lo = 90; tech_hi = 90; - } - else if (technology < 66 && technology > 64) - { + } else if (technology < 66 && technology > 64) { tech_lo = 65; tech_hi = 65; - } - else if (technology < 46 && technology > 44) - { + } else if (technology < 46 && technology > 44) { tech_lo = 45; tech_hi = 45; - } - else if (technology < 33 && technology > 31) - { + } else if (technology < 33 && technology > 31) { tech_lo = 32; tech_hi = 32; - } - else if (technology < 23 && technology > 21) - { + } else if (technology < 23 && technology > 21) { tech_lo = 22; tech_hi = 22; - if (ram_cell_tech_type == 3 ) - { - cout<<"current version does not support eDRAM technologies at 22nm"< 15) -// { -// tech_lo = 16; -// tech_hi = 16; -// } - else if (technology < 180 && technology > 90) - { - tech_lo = 180; - tech_hi = 90; - } - else if (technology < 90 && technology > 65) - { + // else if (technology < 17 && technology > 15) + // { + // tech_lo = 16; + // tech_hi = 16; + // } + else if (technology < 180 && technology > 90) { + tech_lo = 180; + tech_hi = 90; + } else if (technology < 90 && technology > 65) { tech_lo = 90; tech_hi = 65; - } - else if (technology < 65 && technology > 45) - { + } else if (technology < 65 && technology > 45) { tech_lo = 65; tech_hi = 45; - } - else if (technology < 45 && technology > 32) - { + } else if (technology < 45 && technology > 32) { tech_lo = 45; tech_hi = 32; + } else if (technology < 32 && technology > 22) { + tech_lo = 32; + tech_hi = 22; + } + // else if (technology < 22 && technology > 16) + // { + // tech_lo = 22; + // tech_hi = 16; + // } + else { + cout << "Invalid technology nodes" << endl; + exit(0); } - else if (technology < 32 && technology > 22) - { - tech_lo = 32; - tech_hi = 22; - } -// else if (technology < 22 && technology > 16) -// { -// tech_lo = 22; -// tech_hi = 16; -// } - else - { - cout<<"Invalid technology nodes"<specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; - alpha_power_law[0]=1.4; - Lphy[0] = 0.12;//Lphy is the physical gate-length. micron - Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron - t_ox[0] = 1.2e-3*(Aggre_proj? 1.9/1.2:2);//micron - v_th[0] = Aggre_proj? 0.36 : 0.4407;//V - c_ox[0] = 1.79e-14*(Aggre_proj? 1.9/1.2:2);//F/micron2 - mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 0.128*2; //V - c_g_ideal[0] = (Aggre_proj? 1.9/1.2:2)*6.64e-16;//F/micron - c_fringe[0] = (Aggre_proj? 1.9/1.2:2)*0.08e-15;//F/micron - c_junc[0] = (Aggre_proj? 1.9/1.2:2)*1e-15;//F/micron2 - I_on_n[0] = 750e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron - I_on_p[0] = 350e-6;//A/micron - //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline + alpha_power_law[0] = 1.4; + Lphy[0] = 0.12; // Lphy is the physical gate-length. micron + Lelec[0] = 0.10; // Lelec is the electrical gate-length. micron + t_ox[0] = 1.2e-3 * (Aggre_proj ? 1.9 / 1.2 : 2); // micron + v_th[0] = Aggre_proj ? 0.36 : 0.4407; // V + c_ox[0] = 1.79e-14 * (Aggre_proj ? 1.9 / 1.2 : 2); // F/micron2 + mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[0] = 0.128 * 2; // V + c_g_ideal[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 6.64e-16; // F/micron + c_fringe[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 0.08e-15; // F/micron + c_junc[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 1e-15; // F/micron2 + I_on_n[0] = 750e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); // A/micron + I_on_p[0] = 350e-6; // A/micron + // Note that nmos_effective_resistance_multiplier, + // n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are + // calculated offline nmos_effective_resistance_multiplier = 1.54; n_to_p_eff_curr_drv_ratio[0] = 2.45; gmp_to_gmn_multiplier[0] = 1.22; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / + I_on_n[0]; // ohm-micron + Rpchannelon[0] = + n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; // ohm-micron long_channel_leakage_reduction[0] = 1; - I_off_n[0][0] = 7e-10;//A/micron + I_off_n[0][0] = 7e-10; // A/micron I_off_n[0][10] = 8.26e-10; I_off_n[0][20] = 9.74e-10; I_off_n[0][30] = 1.15e-9; @@ -269,7 +257,7 @@ void init_tech_params(double technology, bool is_tag) I_off_n[0][90] = 3.19e-9; I_off_n[0][100] = 3.76e-9; - I_g_on_n[0][0] = 1.65e-10;//A/micron + I_g_on_n[0][0] = 1.65e-10; // A/micron I_g_on_n[0][10] = 1.65e-10; I_g_on_n[0][20] = 1.65e-10; I_g_on_n[0][30] = 1.65e-10; @@ -281,68 +269,75 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[0][90] = 1.65e-10; I_g_on_n[0][100] = 1.65e-10; - //SRAM cell properties + // SRAM cell properties curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited + // CAM cell properties //TODO: data need to be revisited curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm - curr_core_tx_density = 1.25*0.7*0.7*0.4; - curr_sckt_co_eff = 1.11; - curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb - + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; // 360 + curr_asp_ratio_cell_cam = 2.92; // 2.5 + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1.5; // linear scaling from 90nm + curr_core_tx_density = 1.25 * 0.7 * 0.7 * 0.4; + curr_sckt_co_eff = 1.11; + curr_chip_layout_overhead = + 1.0; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.0; // EDA placement and routing tool rule of thumb } - if (tech == 90) - { - SENSE_AMP_D = .28e-9; // s + if (tech == 90) { + SENSE_AMP_D = .28e-9; // s SENSE_AMP_P = 14.7e-15; // J - //90nm technology-node. Corresponds to year 2004 in ITRS - //ITRS HP device type - vdd[0] = 1.2; + // 90nm technology-node. Corresponds to year 2004 in ITRS + // ITRS HP device type + vdd[0] = 1.2; vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; - alpha_power_law[0]=1.34; - Lphy[0] = 0.037;//Lphy is the physical gate-length. micron - Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron - t_ox[0] = 1.2e-3;//micron - v_th[0] = 0.23707;//V - c_ox[0] = 1.79e-14;//F/micron2 - mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 0.128; //V - c_g_ideal[0] = 6.64e-16;//F/micron - c_fringe[0] = 0.08e-15;//F/micron - c_junc[0] = 1e-15;//F/micron2 - I_on_n[0] = 1076.9e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron with ap-law applied for dvs and arbitrary vdd - I_on_p[0] = 712.6e-6;//A/micron - //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline + alpha_power_law[0] = 1.34; + Lphy[0] = 0.037; // Lphy is the physical gate-length. micron + Lelec[0] = 0.0266; // Lelec is the electrical gate-length. micron + t_ox[0] = 1.2e-3; // micron + v_th[0] = 0.23707; // V + c_ox[0] = 1.79e-14; // F/micron2 + mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[0] = 0.128; // V + c_g_ideal[0] = 6.64e-16; // F/micron + c_fringe[0] = 0.08e-15; // F/micron + c_junc[0] = 1e-15; // F/micron2 + I_on_n[0] = + 1076.9e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); // A/micron with ap-law applied + // for dvs and arbitrary vdd + I_on_p[0] = 712.6e-6; // A/micron + // Note that nmos_effective_resistance_multiplier, + // n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are + // calculated offline nmos_effective_resistance_multiplier = 1.54; n_to_p_eff_curr_drv_ratio[0] = 2.45; gmp_to_gmn_multiplier[0] = 1.22; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / + I_on_n[0]; // ohm-micron + Rpchannelon[0] = + n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; // ohm-micron long_channel_leakage_reduction[0] = 1; - I_off_n[0][0] = 3.24e-8*pow(vdd_real[0]/(vdd[0]),4);//A/micron - I_off_n[0][10] = 4.01e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][20] = 4.90e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][30] = 5.92e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][40] = 7.08e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][50] = 8.38e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][60] = 9.82e-8*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][70] = 1.14e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][80] = 1.29e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][90] = 1.43e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][100] = 1.54e-7*pow(vdd_real[0]/(vdd[0]),4); - - I_g_on_n[0][0] = 1.65e-8;//A/micron + I_off_n[0][0] = 3.24e-8 * pow(vdd_real[0] / (vdd[0]), 4); // A/micron + I_off_n[0][10] = 4.01e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][20] = 4.90e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][30] = 5.92e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][40] = 7.08e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][50] = 8.38e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][60] = 9.82e-8 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][70] = 1.14e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][80] = 1.29e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][90] = 1.43e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][100] = 1.54e-7 * pow(vdd_real[0] / (vdd[0]), 4); + + I_g_on_n[0][0] = 1.65e-8; // A/micron I_g_on_n[0][10] = 1.65e-8; I_g_on_n[0][20] = 1.65e-8; I_g_on_n[0][30] = 1.65e-8; @@ -354,41 +349,43 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[0][90] = 1.65e-8; I_g_on_n[0][100] = 1.65e-8; - //ITRS LSTP device type - vdd[1] = 1.3; + // ITRS LSTP device type + vdd[1] = 1.3; vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1]; - alpha_power_law[1]=1.47; - Lphy[1] = 0.075; + alpha_power_law[1] = 1.47; + Lphy[1] = 0.075; Lelec[1] = 0.0486; - t_ox[1] = 2.2e-3; - v_th[1] = 0.48203; - c_ox[1] = 1.22e-14; + t_ox[1] = 2.2e-3; + v_th[1] = 0.48203; + c_ox[1] = 1.22e-14; mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[1] = 0.373; c_g_ideal[1] = 9.15e-16; - c_fringe[1] = 0.08e-15; + c_fringe[1] = 0.08e-15; c_junc[1] = 1e-15; - I_on_n[1] = 503.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]); + I_on_n[1] = 503.6e-6 * pow((vdd_real[1] - v_th[1]) / (vdd[1] - v_th[1]), + alpha_power_law[1]); I_on_p[1] = 235.1e-6; nmos_effective_resistance_multiplier = 1.92; n_to_p_eff_curr_drv_ratio[1] = 2.44; - gmp_to_gmn_multiplier[1] =0.88; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; + gmp_to_gmn_multiplier[1] = 0.88; + Rnchannelon[1] = + nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; long_channel_leakage_reduction[1] = 1; - I_off_n[1][0] = 2.81e-12*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][10] = 4.76e-12*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][20] = 7.82e-12*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][30] = 1.25e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][40] = 1.94e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][50] = 2.94e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][60] = 4.36e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][70] = 6.32e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][80] = 8.95e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][90] = 1.25e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][100] = 1.7e-10*pow(vdd_real[1]/(vdd[1]),4); - - I_g_on_n[1][0] = 3.87e-11;//A/micron + I_off_n[1][0] = 2.81e-12 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][10] = 4.76e-12 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][20] = 7.82e-12 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][30] = 1.25e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][40] = 1.94e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][50] = 2.94e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][60] = 4.36e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][70] = 6.32e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][80] = 8.95e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][90] = 1.25e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][100] = 1.7e-10 * pow(vdd_real[1] / (vdd[1]), 4); + + I_g_on_n[1][0] = 3.87e-11; // A/micron I_g_on_n[1][10] = 3.87e-11; I_g_on_n[1][20] = 3.87e-11; I_g_on_n[1][30] = 3.87e-11; @@ -400,10 +397,10 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[1][90] = 3.87e-11; I_g_on_n[1][100] = 3.87e-11; - //ITRS LOP device type + // ITRS LOP device type vdd[2] = 0.9; vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2]; - alpha_power_law[2]=1.55; + alpha_power_law[2] = 1.55; Lphy[2] = 0.053; Lelec[2] = 0.0354; t_ox[2] = 1.5e-3; @@ -414,27 +411,29 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[2] = 8.45e-16; c_fringe[2] = 0.08e-15; c_junc[2] = 1e-15; - I_on_n[2] = 386.6e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]); + I_on_n[2] = 386.6e-6 * pow((vdd_real[2] - v_th[2]) / (vdd[2] - v_th[2]), + alpha_power_law[2]); I_on_p[2] = 209.7e-6; nmos_effective_resistance_multiplier = 1.77; n_to_p_eff_curr_drv_ratio[2] = 2.54; gmp_to_gmn_multiplier[2] = 0.98; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; + Rnchannelon[2] = + nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; long_channel_leakage_reduction[2] = 1; - I_off_n[2][0] = 2.14e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][10] = 2.9e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][20] = 3.87e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][30] = 5.07e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][40] = 6.54e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][50] = 8.27e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][60] = 1.02e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][70] = 1.20e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][80] = 1.36e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][90] = 1.52e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][100] = 1.73e-8*pow(vdd_real[2]/(vdd[2]),5); - - I_g_on_n[2][0] = 4.31e-8;//A/micron + I_off_n[2][0] = 2.14e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][10] = 2.9e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][20] = 3.87e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][30] = 5.07e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][40] = 6.54e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][50] = 8.27e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][60] = 1.02e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][70] = 1.20e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][80] = 1.36e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][90] = 1.52e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][100] = 1.73e-8 * pow(vdd_real[2] / (vdd[2]), 5); + + I_g_on_n[2][0] = 4.31e-8; // A/micron I_g_on_n[2][10] = 4.31e-8; I_g_on_n[2][20] = 4.31e-8; I_g_on_n[2][30] = 4.31e-8; @@ -446,9 +445,8 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[2][90] = 4.31e-8; I_g_on_n[2][100] = 4.31e-8; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters + if (ram_cell_tech_type == lp_dram) { + // LP-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.2; Lphy[3] = 0.12; Lelec[3] = 0.0756; @@ -463,12 +461,12 @@ void init_tech_params(double technology, bool is_tag) curr_asp_ratio_cell_dram = 1.46; curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters + // LP-DRAM wordline transistor parameters curr_vpp = 1.6; t_ox[3] = 2.2e-3; v_th[3] = 0.4545; c_ox[3] = 1.22e-14; - mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.3; c_g_ideal[3] = 1.47e-15; c_fringe[3] = 0.08e-15; @@ -478,7 +476,8 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.65; n_to_p_eff_curr_drv_ratio[3] = 1.95; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; I_off_n[3][0] = 1.42e-11; @@ -492,10 +491,8 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][80] = 2.57e-10; I_off_n[3][90] = 3.14e-10; I_off_n[3][100] = 3.85e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters + } else if (ram_cell_tech_type == comm_dram) { + // COMM-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.6; Lphy[3] = 0.09; Lelec[3] = 0.0576; @@ -506,16 +503,16 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.09*0.09; + curr_area_cell_dram = 6 * 0.09 * 0.09; curr_asp_ratio_cell_dram = 1.5; curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters + // COMM-DRAM wordline transistor parameters curr_vpp = 3.7; t_ox[3] = 5.5e-3; v_th[3] = 1.0; c_ox[3] = 5.65e-15; - mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.32; c_g_ideal[3] = 5.08e-16; c_fringe[3] = 0.08e-15; @@ -525,7 +522,8 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.62; n_to_p_eff_curr_drv_ratio[3] = 2.05; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; I_off_n[3][0] = 5.80e-15; @@ -541,36 +539,35 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][100] = 1.67e-12; } - //SRAM cell properties + // SRAM cell properties curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited + // CAM cell properties //TODO: data need to be revisited curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1; - curr_core_tx_density = 1.25*0.7*0.7; - curr_sckt_co_eff = 1.1539; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - - + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; // 360 + curr_asp_ratio_cell_cam = 2.92; // 2.5 + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1; + curr_core_tx_density = 1.25 * 0.7 * 0.7; + curr_sckt_co_eff = 1.1539; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb } - if (tech == 65) - { //65nm technology-node. Corresponds to year 2007 in ITRS - //ITRS HP device type - SENSE_AMP_D = .2e-9; // s + if (tech == 65) { // 65nm technology-node. Corresponds to year 2007 in ITRS + // ITRS HP device type + SENSE_AMP_D = .2e-9; // s SENSE_AMP_P = 5.7e-15; // J vdd[0] = 1.1; vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; - alpha_power_law[0]=1.27; + alpha_power_law[0] = 1.27; Lphy[0] = 0.025; Lelec[0] = 0.019; t_ox[0] = 1.1e-3; @@ -581,28 +578,31 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[0] = 4.69e-16; c_fringe[0] = 0.077e-15; c_junc[0] = 1e-15; - I_on_n[0] = 1197.2e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]); + I_on_n[0] = 1197.2e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); I_on_p[0] = 870.8e-6; nmos_effective_resistance_multiplier = 1.50; n_to_p_eff_curr_drv_ratio[0] = 2.41; gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0]; + Rnchannelon[0] = + nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0]; Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.74; - //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first - //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. - I_off_n[0][0] = 1.96e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][10] = 2.29e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][20] = 2.66e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][30] = 3.05e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][40] = 3.49e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][50] = 3.95e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][60] = 4.45e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][70] = 4.97e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][80] = 5.48e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][90] = 5.94e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][100] = 6.3e-7*pow(vdd_real[0]/(vdd[0]),4); - I_g_on_n[0][0] = 4.09e-8;//A/micron + long_channel_leakage_reduction[0] = 1 / 3.74; + // Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate + // increase by 10%, whichever comes first Ioff(Lgate normal)/Ioff(Lgate + // long)= 3.74. + I_off_n[0][0] = 1.96e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][10] = 2.29e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][20] = 2.66e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][30] = 3.05e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][40] = 3.49e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][50] = 3.95e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][60] = 4.45e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][70] = 4.97e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][80] = 5.48e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][90] = 5.94e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][100] = 6.3e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_g_on_n[0][0] = 4.09e-8; // A/micron I_g_on_n[0][10] = 4.09e-8; I_g_on_n[0][20] = 4.09e-8; I_g_on_n[0][30] = 4.09e-8; @@ -614,10 +614,10 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[0][90] = 4.09e-8; I_g_on_n[0][100] = 4.09e-8; - //ITRS LSTP device type + // ITRS LSTP device type vdd[1] = 1.2; - vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];//TODO - alpha_power_law[1]=1.40; + vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1]; // TODO + alpha_power_law[1] = 1.40; Lphy[1] = 0.045; Lelec[1] = 0.0298; t_ox[1] = 1.9e-3; @@ -628,27 +628,29 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[1] = 6.14e-16; c_fringe[1] = 0.08e-15; c_junc[1] = 1e-15; - I_on_n[1] = 519.2e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]); + I_on_n[1] = 519.2e-6 * pow((vdd_real[1] - v_th[1]) / (vdd[1] - v_th[1]), + alpha_power_law[1]); I_on_p[1] = 266e-6; nmos_effective_resistance_multiplier = 1.96; n_to_p_eff_curr_drv_ratio[1] = 2.23; gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; + Rnchannelon[1] = + nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.82; - I_off_n[1][0] = 9.12e-12*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][10] = 1.49e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][20] = 2.36e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][30] = 3.64e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][40] = 5.48e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][50] = 8.05e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][60] = 1.15e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][70] = 1.59e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][80] = 2.1e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][90] = 2.62e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][100] = 3.21e-10*pow(vdd_real[1]/(vdd[1]),4); - - I_g_on_n[1][0] = 1.09e-10;//A/micron + long_channel_leakage_reduction[1] = 1 / 2.82; + I_off_n[1][0] = 9.12e-12 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][10] = 1.49e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][20] = 2.36e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][30] = 3.64e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][40] = 5.48e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][50] = 8.05e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][60] = 1.15e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][70] = 1.59e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][80] = 2.1e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][90] = 2.62e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][100] = 3.21e-10 * pow(vdd_real[1] / (vdd[1]), 4); + + I_g_on_n[1][0] = 1.09e-10; // A/micron I_g_on_n[1][10] = 1.09e-10; I_g_on_n[1][20] = 1.09e-10; I_g_on_n[1][30] = 1.09e-10; @@ -660,9 +662,9 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[1][90] = 1.09e-10; I_g_on_n[1][100] = 1.09e-10; - //ITRS LOP device type + // ITRS LOP device type vdd[2] = 0.8; - alpha_power_law[2]=1.43; + alpha_power_law[2] = 1.43; vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2]; Lphy[2] = 0.032; Lelec[2] = 0.0216; @@ -674,27 +676,29 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[2] = 6e-16; c_fringe[2] = 0.08e-15; c_junc[2] = 1e-15; - I_on_n[2] = 573.1e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]); + I_on_n[2] = 573.1e-6 * pow((vdd_real[2] - v_th[2]) / (vdd[2] - v_th[2]), + alpha_power_law[2]); I_on_p[2] = 340.6e-6; nmos_effective_resistance_multiplier = 1.82; n_to_p_eff_curr_drv_ratio[2] = 2.28; gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; + Rnchannelon[2] = + nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/2.05; - I_off_n[2][0] = 4.9e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][10] = 6.49e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][20] = 8.45e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][30] = 1.08e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][40] = 1.37e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][50] = 1.71e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][60] = 2.09e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][70] = 2.48e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][80] = 2.84e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][90] = 3.13e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][100] = 3.42e-8*pow(vdd_real[2]/(vdd[2]),5); - - I_g_on_n[2][0] = 9.61e-9;//A/micron + long_channel_leakage_reduction[2] = 1 / 2.05; + I_off_n[2][0] = 4.9e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][10] = 6.49e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][20] = 8.45e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][30] = 1.08e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][40] = 1.37e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][50] = 1.71e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][60] = 2.09e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][70] = 2.48e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][80] = 2.84e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][90] = 3.13e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][100] = 3.42e-8 * pow(vdd_real[2] / (vdd[2]), 5); + + I_g_on_n[2][0] = 9.61e-9; // A/micron I_g_on_n[2][10] = 9.61e-9; I_g_on_n[2][20] = 9.61e-9; I_g_on_n[2][30] = 9.61e-9; @@ -706,9 +710,8 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[2][90] = 9.61e-9; I_g_on_n[2][100] = 9.61e-9; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters + if (ram_cell_tech_type == lp_dram) { + // LP-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.2; Lphy[3] = 0.12; Lelec[3] = 0.0756; @@ -723,25 +726,26 @@ void init_tech_params(double technology, bool is_tag) curr_asp_ratio_cell_dram = 1.46; curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters + // LP-DRAM wordline transistor parameters curr_vpp = 1.6; t_ox[3] = 2.2e-3; v_th[3] = 0.43806; c_ox[3] = 1.22e-14; - mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.43806; c_g_ideal[3] = 1.46e-15; c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; + c_junc[3] = 1e-15; I_on_n[3] = 399.8e-6; I_on_p[3] = 243.4e-6; nmos_effective_resistance_multiplier = 1.65; n_to_p_eff_curr_drv_ratio[3] = 2.05; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.23e-11; + I_off_n[3][0] = 2.23e-11; I_off_n[3][10] = 3.46e-11; I_off_n[3][20] = 5.24e-11; I_off_n[3][30] = 7.75e-11; @@ -752,10 +756,8 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][80] = 3.63e-10; I_off_n[3][90] = 4.41e-10; I_off_n[3][100] = 5.36e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters + } else if (ram_cell_tech_type == comm_dram) { + // COMM-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.3; Lphy[3] = 0.065; Lelec[3] = 0.0426; @@ -766,29 +768,30 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.065*0.065; + curr_area_cell_dram = 6 * 0.065 * 0.065; curr_asp_ratio_cell_dram = 1.5; curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters + // COMM-DRAM wordline transistor parameters curr_vpp = 3.3; t_ox[3] = 5e-3; v_th[3] = 1.0; c_ox[3] = 6.16e-15; - mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.385; c_g_ideal[3] = 4e-16; c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; + c_junc[3] = 1e-15; I_on_n[3] = 1031e-6; I_on_p[3] = I_on_n[3] / 2; nmos_effective_resistance_multiplier = 1.69; n_to_p_eff_curr_drv_ratio[3] = 2.39; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.80e-14; + I_off_n[3][0] = 1.80e-14; I_off_n[3][10] = 3.64e-14; I_off_n[3][20] = 7.03e-14; I_off_n[3][30] = 1.31e-13; @@ -801,34 +804,37 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][100] = 3.99e-12; } - //SRAM cell properties + // SRAM cell properties curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited + // CAM cell properties //TODO: data need to be revisited curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor - curr_core_tx_density = 1.25*0.7; - curr_sckt_co_eff = 1.1359; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = + 0.7; // Rather than scale proportionally to square of feature size, + // only scale linearly according to IBM cell processor + curr_core_tx_density = 1.25 * 0.7; + curr_sckt_co_eff = 1.1359; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb } - if (tech == 45) - { //45nm technology-node. Corresponds to year 2010 in ITRS - //ITRS HP device type - SENSE_AMP_D = .04e-9; // s + if (tech == 45) { // 45nm technology-node. Corresponds to year 2010 in ITRS + // ITRS HP device type + SENSE_AMP_D = .04e-9; // s SENSE_AMP_P = 2.7e-15; // J vdd[0] = 1.0; - vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];//TODO - alpha_power_law[0]=1.21; + vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; // TODO + alpha_power_law[0] = 1.21; Lphy[0] = 0.018; Lelec[0] = 0.01345; t_ox[0] = 0.65e-3; @@ -839,29 +845,35 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[0] = 6.78e-16; c_fringe[0] = 0.05e-15; c_junc[0] = 1e-15; - I_on_n[0] = 2046.6e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]); - //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of - //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm - I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI + I_on_n[0] = 2046.6e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); + // There are certain problems with the ITRS PMOS numbers in MASTAR for + // 45nm. So we are using 65nm values of n_to_p_eff_curr_drv_ratio and + // gmp_to_gmn_multiplier for 45nm + I_on_p[0] = I_on_n[0] / 2; // This value is fixed arbitrarily but I_on_p + // is not being used in CACTI nmos_effective_resistance_multiplier = 1.51; n_to_p_eff_curr_drv_ratio[0] = 2.41; gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0]; + Rnchannelon[0] = + nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0]; Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 - I_off_n[0][0] = 2.8e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][10] = 3.28e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][20] = 3.81e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][30] = 4.39e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][40] = 5.02e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][50] = 5.69e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][60] = 6.42e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][70] = 7.2e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][80] = 8.03e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][90] = 8.91e-7*pow(vdd_real[0]/(vdd[0]),4); - I_off_n[0][100] = 9.84e-7*pow(vdd_real[0]/(vdd[0]),4); - - I_g_on_n[0][0] = 3.59e-8;//A/micron + long_channel_leakage_reduction[0] = + 1 / 3.546; // Using MASTAR, @380K, increase Lgate until Ion reduces to + // 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 + I_off_n[0][0] = 2.8e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][10] = 3.28e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][20] = 3.81e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][30] = 4.39e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][40] = 5.02e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][50] = 5.69e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][60] = 6.42e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][70] = 7.2e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][80] = 8.03e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][90] = 8.91e-7 * pow(vdd_real[0] / (vdd[0]), 4); + I_off_n[0][100] = 9.84e-7 * pow(vdd_real[0] / (vdd[0]), 4); + + I_g_on_n[0][0] = 3.59e-8; // A/micron I_g_on_n[0][10] = 3.59e-8; I_g_on_n[0][20] = 3.59e-8; I_g_on_n[0][30] = 3.59e-8; @@ -873,41 +885,43 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[0][90] = 3.59e-8; I_g_on_n[0][100] = 3.59e-8; - //ITRS LSTP device type + // ITRS LSTP device type vdd[1] = 1.1; vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1]; - alpha_power_law[1]=1.33; - Lphy[1] = 0.028; + alpha_power_law[1] = 1.33; + Lphy[1] = 0.028; Lelec[1] = 0.0212; t_ox[1] = 1.4e-3; v_th[1] = 0.50245; c_ox[1] = 2.01e-14; - mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[1] = 9.12e-2; c_g_ideal[1] = 5.18e-16; c_fringe[1] = 0.08e-15; c_junc[1] = 1e-15; - I_on_n[1] = 666.2e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]); + I_on_n[1] = 666.2e-6 * pow((vdd_real[1] - v_th[1]) / (vdd[1] - v_th[1]), + alpha_power_law[1]); I_on_p[1] = I_on_n[1] / 2; nmos_effective_resistance_multiplier = 1.99; n_to_p_eff_curr_drv_ratio[1] = 2.23; gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; + Rnchannelon[1] = + nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.08; - I_off_n[1][0] = 1.01e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][10] = 1.65e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][20] = 2.62e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][30] = 4.06e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][40] = 6.12e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][50] = 9.02e-11*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][60] = 1.3e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][70] = 1.83e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][80] = 2.51e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][90] = 3.29e-10*pow(vdd_real[1]/(vdd[1]),4); - I_off_n[1][100] = 4.1e-10*pow(vdd_real[1]/(vdd[1]),4); - - I_g_on_n[1][0] = 9.47e-12;//A/micron + long_channel_leakage_reduction[1] = 1 / 2.08; + I_off_n[1][0] = 1.01e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][10] = 1.65e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][20] = 2.62e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][30] = 4.06e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][40] = 6.12e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][50] = 9.02e-11 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][60] = 1.3e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][70] = 1.83e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][80] = 2.51e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][90] = 3.29e-10 * pow(vdd_real[1] / (vdd[1]), 4); + I_off_n[1][100] = 4.1e-10 * pow(vdd_real[1] / (vdd[1]), 4); + + I_g_on_n[1][0] = 9.47e-12; // A/micron I_g_on_n[1][10] = 9.47e-12; I_g_on_n[1][20] = 9.47e-12; I_g_on_n[1][30] = 9.47e-12; @@ -919,41 +933,43 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[1][90] = 9.47e-12; I_g_on_n[1][100] = 9.47e-12; - //ITRS LOP device type + // ITRS LOP device type vdd[2] = 0.7; - vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO - alpha_power_law[2]=1.39; + vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2]; // TODO + alpha_power_law[2] = 1.39; Lphy[2] = 0.022; Lelec[2] = 0.016; t_ox[2] = 0.9e-3; v_th[2] = 0.22599; - c_ox[2] = 2.82e-14;//F/micron2 + c_ox[2] = 2.82e-14; // F/micron2 mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[2] = 5.71e-2; c_g_ideal[2] = 6.2e-16; c_fringe[2] = 0.073e-15; c_junc[2] = 1e-15; - I_on_n[2] = 748.9e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]); + I_on_n[2] = 748.9e-6 * pow((vdd_real[2] - v_th[2]) / (vdd[2] - v_th[2]), + alpha_power_law[2]); I_on_p[2] = I_on_n[2] / 2; nmos_effective_resistance_multiplier = 1.76; n_to_p_eff_curr_drv_ratio[2] = 2.28; gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; + Rnchannelon[2] = + nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.92; - I_off_n[2][0] = 4.03e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][10] = 5.02e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][20] = 6.18e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][30] = 7.51e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][40] = 9.04e-9*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][50] = 1.08e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][60] = 1.27e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][70] = 1.47e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][80] = 1.66e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][90] = 1.84e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][100] = 2.03e-8*pow(vdd_real[2]/(vdd[2]),5); - - I_g_on_n[2][0] = 3.24e-8;//A/micron + long_channel_leakage_reduction[2] = 1 / 1.92; + I_off_n[2][0] = 4.03e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][10] = 5.02e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][20] = 6.18e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][30] = 7.51e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][40] = 9.04e-9 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][50] = 1.08e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][60] = 1.27e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][70] = 1.47e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][80] = 1.66e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][90] = 1.84e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][100] = 2.03e-8 * pow(vdd_real[2] / (vdd[2]), 5); + + I_g_on_n[2][0] = 3.24e-8; // A/micron I_g_on_n[2][10] = 4.01e-8; I_g_on_n[2][20] = 4.90e-8; I_g_on_n[2][30] = 5.92e-8; @@ -965,29 +981,29 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[2][90] = 1.43e-7; I_g_on_n[2][100] = 1.54e-7; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters + if (ram_cell_tech_type == lp_dram) { + // LP-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.1; Lphy[3] = 0.078; - Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + Lelec[3] = 0.0504; // Assume Lelec is 30% lesser than Lphy for DRAM + // access and wordline transistors. curr_v_th_dram_access_transistor = 0.44559; width_dram_access_transistor = 0.079; - curr_I_on_dram_cell = 36e-6;//A + curr_I_on_dram_cell = 36e-6; // A curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; + curr_Wmemcellnmos_dram = 0; curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; curr_asp_ratio_cell_dram = 1.46; curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters + // LP-DRAM wordline transistor parameters curr_vpp = 1.5; t_ox[3] = 2.1e-3; v_th[3] = 0.44559; c_ox[3] = 1.41e-14; - mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.181; c_g_ideal[3] = 1.10e-15; c_fringe[3] = 0.08e-15; @@ -997,7 +1013,8 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.65; n_to_p_eff_curr_drv_ratio[3] = 2.05; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; I_off_n[3][0] = 2.54e-11; @@ -1011,25 +1028,23 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][80] = 4.26e-10; I_off_n[3][90] = 5.27e-10; I_off_n[3][100] = 6.46e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters + } else if (ram_cell_tech_type == comm_dram) { + // COMM-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.1; Lphy[3] = 0.045; Lelec[3] = 0.0298; curr_v_th_dram_access_transistor = 1; width_dram_access_transistor = 0.045; - curr_I_on_dram_cell = 20e-6;//A + curr_I_on_dram_cell = 20e-6; // A curr_I_off_dram_cell_worst_case_length_temp = 1e-15; curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.045*0.045; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.045 * 0.045; curr_asp_ratio_cell_dram = 1.5; curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters + // COMM-DRAM wordline transistor parameters curr_vpp = 2.7; t_ox[3] = 4e-3; v_th[3] = 1.0; @@ -1044,7 +1059,8 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.69; n_to_p_eff_curr_drv_ratio[3] = 1.95; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; I_off_n[3][0] = 1.31e-14; @@ -1060,37 +1076,37 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][100] = 3.29e-12; } - - //SRAM cell properties + // SRAM cell properties curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited + // CAM cell properties //TODO: data need to be revisited curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7; - curr_core_tx_density = 1.25; - curr_sckt_co_eff = 1.1387; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7; + curr_core_tx_density = 1.25; + curr_sckt_co_eff = 1.1387; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb } - if (tech == 32) - { - SENSE_AMP_D = .03e-9; // s + if (tech == 32) { + SENSE_AMP_D = .03e-9; // s SENSE_AMP_P = 2.16e-15; // J - //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm - //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for - //HP and LSTP. + // For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is + // 32 nm technology i.e. FEATURESIZE = 0.032). Using the SOI process + // numbers for HP and LSTP. vdd[0] = 0.9; vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; - alpha_power_law[0]=1.19; + alpha_power_law[0] = 1.19; Lphy[0] = 0.013; Lelec[0] = 0.01013; t_ox[0] = 0.5e-3; @@ -1101,29 +1117,33 @@ void init_tech_params(double technology, bool is_tag) c_g_ideal[0] = 5.34e-16; c_fringe[0] = 0.04e-15; c_junc[0] = 1e-15; - I_on_n[0] = 2211.7e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]); + I_on_n[0] = 2211.7e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); I_on_p[0] = I_on_n[0] / 2; nmos_effective_resistance_multiplier = 1.49; n_to_p_eff_curr_drv_ratio[0] = 2.41; gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.706; - //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), - //whichever comes first - I_off_n[0][0] = 1.52e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][10] = 1.55e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][20] = 1.59e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][30] = 1.68e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][40] = 1.90e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][50] = 2.69e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][60] = 5.32e-7*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][70] = 1.02e-6*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][80] = 1.62e-6*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][90] = 2.73e-6*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][100] = 6.1e-6*pow(vdd_real[0]/(vdd[0]),2); - - I_g_on_n[0][0] = 6.55e-8;//A/micron + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / + I_on_n[0]; // ohm-micron + Rpchannelon[0] = + n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; // ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.706; + // Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate + // until Ion reduces to 95% or Lgate increase by 5% (DG device can only + // increase by 5%), whichever comes first + I_off_n[0][0] = 1.52e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][10] = 1.55e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][20] = 1.59e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][30] = 1.68e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][40] = 1.90e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][50] = 2.69e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][60] = 5.32e-7 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][70] = 1.02e-6 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][80] = 1.62e-6 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][90] = 2.73e-6 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][100] = 6.1e-6 * pow(vdd_real[0] / (vdd[0]), 2); + + I_g_on_n[0][0] = 6.55e-8; // A/micron I_g_on_n[0][10] = 6.55e-8; I_g_on_n[0][20] = 6.55e-8; I_g_on_n[0][30] = 6.55e-8; @@ -1135,54 +1155,56 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[0][90] = 6.55e-8; I_g_on_n[0][100] = 6.55e-8; -// 32 DG -// I_g_on_n[0][0] = 2.71e-9;//A/micron -// I_g_on_n[0][10] = 2.71e-9; -// I_g_on_n[0][20] = 2.71e-9; -// I_g_on_n[0][30] = 2.71e-9; -// I_g_on_n[0][40] = 2.71e-9; -// I_g_on_n[0][50] = 2.71e-9; -// I_g_on_n[0][60] = 2.71e-9; -// I_g_on_n[0][70] = 2.71e-9; -// I_g_on_n[0][80] = 2.71e-9; -// I_g_on_n[0][90] = 2.71e-9; -// I_g_on_n[0][100] = 2.71e-9; - - //LSTP device type + // 32 DG + // I_g_on_n[0][0] = 2.71e-9;//A/micron + // I_g_on_n[0][10] = 2.71e-9; + // I_g_on_n[0][20] = 2.71e-9; + // I_g_on_n[0][30] = 2.71e-9; + // I_g_on_n[0][40] = 2.71e-9; + // I_g_on_n[0][50] = 2.71e-9; + // I_g_on_n[0][60] = 2.71e-9; + // I_g_on_n[0][70] = 2.71e-9; + // I_g_on_n[0][80] = 2.71e-9; + // I_g_on_n[0][90] = 2.71e-9; + // I_g_on_n[0][100] = 2.71e-9; + + // LSTP device type vdd[1] = 1; vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1]; - alpha_power_law[1]=1.27; + alpha_power_law[1] = 1.27; Lphy[1] = 0.020; Lelec[1] = 0.0173; t_ox[1] = 1.2e-3; v_th[1] = 0.513; c_ox[1] = 2.29e-14; - mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[1] = 8.64e-2; c_g_ideal[1] = 4.58e-16; c_fringe[1] = 0.053e-15; c_junc[1] = 1e-15; - I_on_n[1] = 683.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]); + I_on_n[1] = 683.6e-6 * pow((vdd_real[1] - v_th[1]) / (vdd[1] - v_th[1]), + alpha_power_law[1]); I_on_p[1] = I_on_n[1] / 2; nmos_effective_resistance_multiplier = 1.99; n_to_p_eff_curr_drv_ratio[1] = 2.23; gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; + Rnchannelon[1] = + nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1]; Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/1.93; - I_off_n[1][0] = 2.06e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][10] = 3.30e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][20] = 5.15e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][30] = 7.83e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][40] = 1.16e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][50] = 1.69e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][60] = 2.40e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][70] = 3.34e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][80] = 4.54e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][90] = 5.96e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][100] = 7.44e-10*pow(vdd_real[1]/(vdd[1]),1); - - I_g_on_n[1][0] = 3.73e-11;//A/micron + long_channel_leakage_reduction[1] = 1 / 1.93; + I_off_n[1][0] = 2.06e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][10] = 3.30e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][20] = 5.15e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][30] = 7.83e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][40] = 1.16e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][50] = 1.69e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][60] = 2.40e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][70] = 3.34e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][80] = 4.54e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][90] = 5.96e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][100] = 7.44e-10 * pow(vdd_real[1] / (vdd[1]), 1); + + I_g_on_n[1][0] = 3.73e-11; // A/micron I_g_on_n[1][10] = 3.73e-11; I_g_on_n[1][20] = 3.73e-11; I_g_on_n[1][30] = 3.73e-11; @@ -1194,42 +1216,43 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[1][90] = 3.73e-11; I_g_on_n[1][100] = 3.73e-11; - - //LOP device type + // LOP device type vdd[2] = 0.6; - vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO - alpha_power_law[2]=1.26; + vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2]; // TODO + alpha_power_law[2] = 1.26; Lphy[2] = 0.016; Lelec[2] = 0.01232; t_ox[2] = 0.9e-3; v_th[2] = 0.24227; c_ox[2] = 2.84e-14; - mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[2] = 4.64e-2; c_g_ideal[2] = 4.54e-16; c_fringe[2] = 0.057e-15; c_junc[2] = 1e-15; - I_on_n[2] = 827.8e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]); + I_on_n[2] = 827.8e-6 * pow((vdd_real[2] - v_th[2]) / (vdd[2] - v_th[2]), + alpha_power_law[2]); I_on_p[2] = I_on_n[2] / 2; nmos_effective_resistance_multiplier = 1.73; n_to_p_eff_curr_drv_ratio[2] = 2.28; gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; + Rnchannelon[2] = + nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2]; Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.89; - I_off_n[2][0] = 5.94e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][10] = 7.23e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][20] = 8.7e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][30] = 1.04e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][40] = 1.22e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][50] = 1.43e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][60] = 1.65e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][70] = 1.90e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][80] = 2.15e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][90] = 2.39e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][100] = 2.63e-7*pow(vdd_real[2]/(vdd[2]),5); - - I_g_on_n[2][0] = 2.93e-9;//A/micron + long_channel_leakage_reduction[2] = 1 / 1.89; + I_off_n[2][0] = 5.94e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][10] = 7.23e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][20] = 8.7e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][30] = 1.04e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][40] = 1.22e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][50] = 1.43e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][60] = 1.65e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][70] = 1.90e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][80] = 2.15e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][90] = 2.39e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][100] = 2.63e-7 * pow(vdd_real[2] / (vdd[2]), 5); + + I_g_on_n[2][0] = 2.93e-9; // A/micron I_g_on_n[2][10] = 2.93e-9; I_g_on_n[2][20] = 2.93e-9; I_g_on_n[2][30] = 2.93e-9; @@ -1241,12 +1264,12 @@ void init_tech_params(double technology, bool is_tag) I_g_on_n[2][90] = 2.93e-9; I_g_on_n[2][100] = 2.93e-9; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters + if (ram_cell_tech_type == lp_dram) { + // LP-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.0; Lphy[3] = 0.056; - Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + Lelec[3] = 0.0419; // Assume Lelec is 30% lesser than Lphy for DRAM + // access and wordline transistors. curr_v_th_dram_access_transistor = 0.44129; width_dram_access_transistor = 0.056; curr_I_on_dram_cell = 36e-6; @@ -1258,12 +1281,12 @@ void init_tech_params(double technology, bool is_tag) curr_asp_ratio_cell_dram = 1.46; curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters + // LP-DRAM wordline transistor parameters curr_vpp = 1.5; t_ox[3] = 2e-3; v_th[3] = 0.44467; c_ox[3] = 1.48e-14; - mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.174; c_g_ideal[3] = 7.45e-16; c_fringe[3] = 0.053e-15; @@ -1273,10 +1296,11 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.65; n_to_p_eff_curr_drv_ratio[3] = 2.05; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.57e-11; + I_off_n[3][0] = 3.57e-11; I_off_n[3][10] = 5.51e-11; I_off_n[3][20] = 8.27e-11; I_off_n[3][30] = 1.21e-10; @@ -1287,13 +1311,12 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][80] = 5.87e-10; I_off_n[3][90] = 7.29e-10; I_off_n[3][100] = 8.87e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters + } else if (ram_cell_tech_type == comm_dram) { + // COMM-DRAM cell access transistor technology parameters curr_vdd_dram_cell = 1.0; Lphy[3] = 0.032; - Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + Lelec[3] = 0.0205; // Assume Lelec is 30% lesser than Lphy for DRAM + // access and wordline transistors. curr_v_th_dram_access_transistor = 1; width_dram_access_transistor = 0.032; curr_I_on_dram_cell = 20e-6; @@ -1301,16 +1324,16 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.032*0.032; + curr_area_cell_dram = 6 * 0.032 * 0.032; curr_asp_ratio_cell_dram = 1.5; curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters + // COMM-DRAM wordline transistor parameters curr_vpp = 2.6; t_ox[3] = 4e-3; v_th[3] = 1.0; c_ox[3] = 7.99e-15; - mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); + mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); Vdsat[3] = 0.129; c_g_ideal[3] = 2.56e-16; c_fringe[3] = 0.053e-15; @@ -1320,10 +1343,11 @@ void init_tech_params(double technology, bool is_tag) nmos_effective_resistance_multiplier = 1.69; n_to_p_eff_curr_drv_ratio[3] = 1.95; gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rnchannelon[3] = + nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.63e-14; + I_off_n[3][0] = 3.63e-14; I_off_n[3][10] = 7.18e-14; I_off_n[3][20] = 1.36e-13; I_off_n[3][30] = 2.49e-13; @@ -1336,595 +1360,708 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][100] = 7.16e-12; } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + // SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + // CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7; + curr_sckt_co_eff = 1.1111; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb + } + + if (tech == 22) { + SENSE_AMP_D = .03e-9; // s + SENSE_AMP_P = 2.16e-15; // J + // For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is + // 22 nm technology i.e. FEATURESIZE = 0.022). Using the DG process + // numbers for HP. 22 nm HP + vdd[0] = 0.8; + vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0]; // TODO + alpha_power_law[0] = 1.2; // 1.3//1.15; + Lphy[0] = 0.009; // Lphy is the physical gate-length. + Lelec[0] = 0.00468; // Lelec is the electrical gate-length. + t_ox[0] = 0.55e-3; // micron + v_th[0] = 0.1395; // V + c_ox[0] = 3.63e-14; // F/micron2 + mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[0] = 2.33e-2; // V/micron + c_g_ideal[0] = 3.27e-16; // F/micron + c_fringe[0] = 0.06e-15; // F/micron + c_junc[0] = 0; // F/micron2 + I_on_n[0] = 2626.4e-6 * pow((vdd_real[0] - v_th[0]) / (vdd[0] - v_th[0]), + alpha_power_law[0]); // A/micron + I_on_p[0] = + I_on_n[0] / 2; // A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = 1.45; + n_to_p_eff_curr_drv_ratio[0] = + 2; // Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; // Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / + I_on_n[0]; // ohm-micron + Rpchannelon[0] = + n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; // ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.274; + I_off_n[0][0] = + 1.52e-7 / 1.5 * 1.2 * + pow(vdd_real[0] / (vdd[0]), + 2); // From 22nm, leakage current are directly from ITRS report + // rather than MASTAR, since MASTAR has serious bugs there. + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * pow(vdd_real[0] / (vdd[0]), 2); + // for 22nm DG HP + I_g_on_n[0][0] = 1.81e-9; // A/micron + I_g_on_n[0][10] = 1.81e-9; + I_g_on_n[0][20] = 1.81e-9; + I_g_on_n[0][30] = 1.81e-9; + I_g_on_n[0][40] = 1.81e-9; + I_g_on_n[0][50] = 1.81e-9; + I_g_on_n[0][60] = 1.81e-9; + I_g_on_n[0][70] = 1.81e-9; + I_g_on_n[0][80] = 1.81e-9; + I_g_on_n[0][90] = 1.81e-9; + I_g_on_n[0][100] = 1.81e-9; + + // 22 nm LSTP DG + vdd[1] = 0.8; + vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1]; // TODO + alpha_power_law[1] = 1.23; + Lphy[1] = 0.014; + Lelec[1] = 0.008; // Lelec is the electrical gate-length. + t_ox[1] = 1.1e-3; // micron + v_th[1] = 0.40126; // V + c_ox[1] = 2.30e-14; // F/micron2 + mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[1] = 6.64e-2; // V/micron + c_g_ideal[1] = 3.22e-16; // F/micron + c_fringe[1] = 0.08e-15; + c_junc[1] = 0; // F/micron2 + I_on_n[1] = 727.6e-6 * pow((vdd_real[1] - v_th[1]) / (vdd[1] - v_th[1]), + alpha_power_law[1]); // A/micron + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / + I_on_n[1]; // ohm-micron + Rpchannelon[1] = + n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; // ohm-micron + long_channel_leakage_reduction[1] = 1 / 1.89; + I_off_n[1][0] = 2.43e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][10] = 4.85e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][20] = 9.68e-11 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][30] = 1.94e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][40] = 3.87e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][50] = 7.73e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][60] = 3.55e-10 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][70] = 3.09e-9 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][80] = 6.19e-9 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][90] = 1.24e-8 * pow(vdd_real[1] / (vdd[1]), 1); + I_off_n[1][100] = 2.48e-8 * pow(vdd_real[1] / (vdd[1]), 1); + + I_g_on_n[1][0] = 4.51e-10; // A/micron + I_g_on_n[1][10] = 4.51e-10; + I_g_on_n[1][20] = 4.51e-10; + I_g_on_n[1][30] = 4.51e-10; + I_g_on_n[1][40] = 4.51e-10; + I_g_on_n[1][50] = 4.51e-10; + I_g_on_n[1][60] = 4.51e-10; + I_g_on_n[1][70] = 4.51e-10; + I_g_on_n[1][80] = 4.51e-10; + I_g_on_n[1][90] = 4.51e-10; + I_g_on_n[1][100] = 4.51e-10; + + // 22 nm LOP + vdd[2] = 0.6; + vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2]; // TODO + alpha_power_law[2] = 1.21; + Lphy[2] = 0.011; + Lelec[2] = 0.00604; // Lelec is the electrical gate-length. + t_ox[2] = 0.8e-3; // micron + v_th[2] = 0.2315; // V + c_ox[2] = 2.87e-14; // F/micron2 + mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[2] = 1.81e-2; // V/micron + c_g_ideal[2] = 3.16e-16; // F/micron + c_fringe[2] = 0.08e-15; + c_junc[2] = + 0; // F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab + I_on_n[2] = 916.1e-6 * pow((vdd_real[2] - v_th[2]) / (vdd[2] - v_th[2]), + alpha_power_law[2]); // A/micron + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.73; + n_to_p_eff_curr_drv_ratio[2] = 2; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / + I_on_n[2]; // ohm-micron + Rpchannelon[2] = + n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; // ohm-micron + long_channel_leakage_reduction[2] = 1 / 2.38; + + I_off_n[2][0] = 1.31e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][10] = 2.60e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][20] = 5.14e-8 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][30] = 1.02e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][40] = 2.02e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][50] = 3.99e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][60] = 7.91e-7 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][70] = 1.09e-6 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][80] = 2.09e-6 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][90] = 4.04e-6 * pow(vdd_real[2] / (vdd[2]), 5); + I_off_n[2][100] = 4.48e-6 * pow(vdd_real[2] / (vdd[2]), 5); + + I_g_on_n[2][0] = 2.74e-9; // A/micron + I_g_on_n[2][10] = 2.74e-9; + I_g_on_n[2][20] = 2.74e-9; + I_g_on_n[2][30] = 2.74e-9; + I_g_on_n[2][40] = 2.74e-9; + I_g_on_n[2][50] = 2.74e-9; + I_g_on_n[2][60] = 2.74e-9; + I_g_on_n[2][70] = 2.74e-9; + I_g_on_n[2][80] = 2.74e-9; + I_g_on_n[2][90] = 2.74e-9; + I_g_on_n[2][100] = 2.74e-9; + + if (ram_cell_tech_type == 3) { + } else if (ram_cell_tech_type == 4) { + // 22 nm commodity DRAM cell access transistor technology parameters. + // parameters + curr_vdd_dram_cell = 0.9; // 0.45;//This value has reduced greatly in + // 2007 ITRS for all technology nodes. In + // 2005 ITRS, the value was about twice the value in 2007 ITRS + Lphy[3] = 0.022; // micron + Lelec[3] = 0.0181; // micron. + curr_v_th_dram_access_transistor = 1; // V + width_dram_access_transistor = 0.022; // micron + curr_I_on_dram_cell = + 20e-6; // This is a typical value that I have always + // kept constant. In reality this could perhaps be lower + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; // A + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.022 * 0.022; // micron2. + curr_asp_ratio_cell_dram = 0.667; + curr_c_dram_cell = 30e-15; // This is a typical value that I have alwaus + // kept constant. + + // 22 nm commodity DRAM wordline transistor parameters obtained using + // MASTAR. + curr_vpp = 2.3; // vpp. V + t_ox[3] = 3.5e-3; // micron + v_th[3] = 1.0; // V + c_ox[3] = 9.06e-15; // F/micron2 + mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[3] = 0.0972; // V/micron + c_g_ideal[3] = 1.99e-16; // F/micron + c_fringe[3] = 0.053e-15; // F/micron + c_junc[3] = 1e-15; // F/micron2 + I_on_n[3] = 910.5e-6; // A/micron + I_on_p[3] = I_on_n[3] / 2; // This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = + 1.69; // Using the value from 32nm. + // + n_to_p_eff_curr_drv_ratio[3] = 1.95; // Using the value from 32nm + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / + I_on_n[3]; // ohm-micron + Rpchannelon[3] = + n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; // ohm-micron + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.1e-13; // A/micron + I_off_n[3][10] = 2.11e-13; + I_off_n[3][20] = 3.88e-13; + I_off_n[3][30] = 6.9e-13; + I_off_n[3][40] = 1.19e-12; + I_off_n[3][50] = 1.98e-12; + I_off_n[3][60] = 3.22e-12; + I_off_n[3][70] = 5.09e-12; + I_off_n[3][80] = 7.85e-12; + I_off_n[3][90] = 1.18e-11; + I_off_n[3][100] = 1.72e-11; + + } else { + // some error handler + } + + // SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited + // CAM cell properties //TODO: data need to be revisited curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7; - curr_sckt_co_eff = 1.1111; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb } - if(tech == 22){ - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm - //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. - //22 nm HP - vdd[0] = 0.8; - vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];//TODO - alpha_power_law[0]=1.2;//1.3//1.15; - Lphy[0] = 0.009;//Lphy is the physical gate-length. - Lelec[0] = 0.00468;//Lelec is the electrical gate-length. - t_ox[0] = 0.55e-3;//micron - v_th[0] = 0.1395;//V - c_ox[0] = 3.63e-14;//F/micron2 - mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 2.33e-2; //V/micron - c_g_ideal[0] = 3.27e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron - c_junc[0] = 0;//F/micron2 - I_on_n[0] = 2626.4e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.45; - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.274; - I_off_n[0][0] = 1.52e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there. - I_off_n[0][10] = 1.55e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][20] = 1.59e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][30] = 1.68e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][40] = 1.90e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][50] = 2.69e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][60] = 5.32e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][70] = 1.02e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][80] = 1.62e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][90] = 2.73e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - I_off_n[0][100] = 6.1e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2); - //for 22nm DG HP - I_g_on_n[0][0] = 1.81e-9;//A/micron - I_g_on_n[0][10] = 1.81e-9; - I_g_on_n[0][20] = 1.81e-9; - I_g_on_n[0][30] = 1.81e-9; - I_g_on_n[0][40] = 1.81e-9; - I_g_on_n[0][50] = 1.81e-9; - I_g_on_n[0][60] = 1.81e-9; - I_g_on_n[0][70] = 1.81e-9; - I_g_on_n[0][80] = 1.81e-9; - I_g_on_n[0][90] = 1.81e-9; - I_g_on_n[0][100] = 1.81e-9; - - //22 nm LSTP DG - vdd[1] = 0.8; - vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];//TODO - alpha_power_law[1]=1.23; - Lphy[1] = 0.014; - Lelec[1] = 0.008;//Lelec is the electrical gate-length. - t_ox[1] = 1.1e-3;//micron - v_th[1] = 0.40126;//V - c_ox[1] = 2.30e-14;//F/micron2 - mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[1] = 6.64e-2; //V/micron - c_g_ideal[1] = 3.22e-16;//F/micron - c_fringe[1] = 0.08e-15; - c_junc[1] = 0;//F/micron2 - I_on_n[1] = 727.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);//A/micron - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];//ohm-micron - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron - long_channel_leakage_reduction[1] = 1/1.89; - I_off_n[1][0] = 2.43e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][10] = 4.85e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][20] = 9.68e-11*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][30] = 1.94e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][40] = 3.87e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][50] = 7.73e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][60] = 3.55e-10*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][70] = 3.09e-9*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][80] = 6.19e-9*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][90] = 1.24e-8*pow(vdd_real[1]/(vdd[1]),1); - I_off_n[1][100]= 2.48e-8*pow(vdd_real[1]/(vdd[1]),1); - - I_g_on_n[1][0] = 4.51e-10;//A/micron - I_g_on_n[1][10] = 4.51e-10; - I_g_on_n[1][20] = 4.51e-10; - I_g_on_n[1][30] = 4.51e-10; - I_g_on_n[1][40] = 4.51e-10; - I_g_on_n[1][50] = 4.51e-10; - I_g_on_n[1][60] = 4.51e-10; - I_g_on_n[1][70] = 4.51e-10; - I_g_on_n[1][80] = 4.51e-10; - I_g_on_n[1][90] = 4.51e-10; - I_g_on_n[1][100] = 4.51e-10; - - //22 nm LOP - vdd[2] = 0.6; - vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO - alpha_power_law[2]=1.21; - Lphy[2] = 0.011; - Lelec[2] = 0.00604;//Lelec is the electrical gate-length. - t_ox[2] = 0.8e-3;//micron - v_th[2] = 0.2315;//V - c_ox[2] = 2.87e-14;//F/micron2 - mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[2] = 1.81e-2; //V/micron - c_g_ideal[2] = 3.16e-16;//F/micron - c_fringe[2] = 0.08e-15; - c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab - I_on_n[2] = 916.1e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);//A/micron - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];//ohm-micron - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron - long_channel_leakage_reduction[2] = 1/2.38; - - I_off_n[2][0] = 1.31e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][10] = 2.60e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][20] = 5.14e-8*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][30] = 1.02e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][40] = 2.02e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][50] = 3.99e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][60] = 7.91e-7*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][70] = 1.09e-6*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][80] = 2.09e-6*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][90] = 4.04e-6*pow(vdd_real[2]/(vdd[2]),5); - I_off_n[2][100]= 4.48e-6*pow(vdd_real[2]/(vdd[2]),5); - - I_g_on_n[2][0] = 2.74e-9;//A/micron - I_g_on_n[2][10] = 2.74e-9; - I_g_on_n[2][20] = 2.74e-9; - I_g_on_n[2][30] = 2.74e-9; - I_g_on_n[2][40] = 2.74e-9; - I_g_on_n[2][50] = 2.74e-9; - I_g_on_n[2][60] = 2.74e-9; - I_g_on_n[2][70] = 2.74e-9; - I_g_on_n[2][80] = 2.74e-9; - I_g_on_n[2][90] = 2.74e-9; - I_g_on_n[2][100] = 2.74e-9; - - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 16){ - //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm - //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. - //16 nm HP - vdd[0] = 0.7; - Lphy[0] = 0.006;//Lphy is the physical gate-length. - Lelec[0] = 0.00315;//Lelec is the electrical gate-length. - t_ox[0] = 0.5e-3;//micron - v_th[0] = 0.1489;//V - c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR - mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet - c_g_ideal[0] = 2.30e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 - c_junc[0] = 0;//F/micron2 MASTAR result dynamic - I_on_n[0] = 2768.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/2.655; - I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07; - I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07; - I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07; - I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07; - I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07; - I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07; - I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07; - I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07; - I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07; - I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07; - I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07; - //for 16nm DG HP - I_g_on_n[0][0] = 1.07e-9;//A/micron - I_g_on_n[0][10] = 1.07e-9; - I_g_on_n[0][20] = 1.07e-9; - I_g_on_n[0][30] = 1.07e-9; - I_g_on_n[0][40] = 1.07e-9; - I_g_on_n[0][50] = 1.07e-9; - I_g_on_n[0][60] = 1.07e-9; - I_g_on_n[0][70] = 1.07e-9; - I_g_on_n[0][80] = 1.07e-9; - I_g_on_n[0][90] = 1.07e-9; - I_g_on_n[0][100] = 1.07e-9; - -// //16 nm LSTP DG -// vdd[1] = 0.8; -// Lphy[1] = 0.014; -// Lelec[1] = 0.008;//Lelec is the electrical gate-length. -// t_ox[1] = 1.1e-3;//micron -// v_th[1] = 0.40126;//V -// c_ox[1] = 2.30e-14;//F/micron2 -// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs -// Vdsat[1] = 6.64e-2; //V/micron -// c_g_ideal[1] = 3.22e-16;//F/micron -// c_fringe[1] = 0.008e-15; -// c_junc[1] = 0;//F/micron2 -// I_on_n[1] = 727.6e-6;//A/micron -// I_on_p[1] = I_on_n[1] / 2; -// nmos_effective_resistance_multiplier = 1.99; -// n_to_p_eff_curr_drv_ratio[1] = 2; -// gmp_to_gmn_multiplier[1] = 0.99; -// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron -// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron -// I_off_n[1][0] = 2.43e-11; -// I_off_n[1][10] = 4.85e-11; -// I_off_n[1][20] = 9.68e-11; -// I_off_n[1][30] = 1.94e-10; -// I_off_n[1][40] = 3.87e-10; -// I_off_n[1][50] = 7.73e-10; -// I_off_n[1][60] = 3.55e-10; -// I_off_n[1][70] = 3.09e-9; -// I_off_n[1][80] = 6.19e-9; -// I_off_n[1][90] = 1.24e-8; -// I_off_n[1][100]= 2.48e-8; -// -// // for 22nm LSTP HP -// I_g_on_n[1][0] = 4.51e-10;//A/micron -// I_g_on_n[1][10] = 4.51e-10; -// I_g_on_n[1][20] = 4.51e-10; -// I_g_on_n[1][30] = 4.51e-10; -// I_g_on_n[1][40] = 4.51e-10; -// I_g_on_n[1][50] = 4.51e-10; -// I_g_on_n[1][60] = 4.51e-10; -// I_g_on_n[1][70] = 4.51e-10; -// I_g_on_n[1][80] = 4.51e-10; -// I_g_on_n[1][90] = 4.51e-10; -// I_g_on_n[1][100] = 4.51e-10; - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + if (tech == 16) { + // For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is + // 16 nm technology i.e. FEATURESIZE = 0.016). Using the DG process + // numbers for HP. 16 nm HP + vdd[0] = 0.7; + Lphy[0] = 0.006; // Lphy is the physical gate-length. + Lelec[0] = 0.00315; // Lelec is the electrical gate-length. + t_ox[0] = 0.5e-3; // micron + v_th[0] = 0.1489; // V + c_ox[0] = 3.83e-14; // F/micron2 Cox_elec in MASTAR + mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[0] = 1.42e-2; // V/micron calculated in spreadsheet + c_g_ideal[0] = 2.30e-16; // F/micron + c_fringe[0] = 0.06e-15; // F/micron MASTAR inputdynamic/3 + c_junc[0] = 0; // F/micron2 MASTAR result dynamic + I_on_n[0] = 2768.4e-6; // A/micron + I_on_p[0] = + I_on_n[0] / 2; // A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = + 1.48; // nmos_effective_resistance_multiplier is the ratio of Ieff to + // Idsat where Ieff is the effective NMOS current and Idsat is + // the saturation current. + n_to_p_eff_curr_drv_ratio[0] = + 2; // Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; // Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / + I_on_n[0]; // ohm-micron + Rpchannelon[0] = + n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; // ohm-micron + long_channel_leakage_reduction[0] = 1 / 2.655; + I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * 1.07; + // for 16nm DG HP + I_g_on_n[0][0] = 1.07e-9; // A/micron + I_g_on_n[0][10] = 1.07e-9; + I_g_on_n[0][20] = 1.07e-9; + I_g_on_n[0][30] = 1.07e-9; + I_g_on_n[0][40] = 1.07e-9; + I_g_on_n[0][50] = 1.07e-9; + I_g_on_n[0][60] = 1.07e-9; + I_g_on_n[0][70] = 1.07e-9; + I_g_on_n[0][80] = 1.07e-9; + I_g_on_n[0][90] = 1.07e-9; + I_g_on_n[0][100] = 1.07e-9; + + // //16 nm LSTP DG + // vdd[1] = 0.8; + // Lphy[1] = 0.014; + // Lelec[1] = 0.008;//Lelec is the electrical gate-length. + // t_ox[1] = 1.1e-3;//micron + // v_th[1] = 0.40126;//V + // c_ox[1] = 2.30e-14;//F/micron2 + // mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 + // / Vs Vdsat[1] = 6.64e-2; //V/micron c_g_ideal[1] + // = 3.22e-16;//F/micron c_fringe[1] = 0.008e-15; c_junc[1] + // = + // 0;//F/micron2 I_on_n[1] = 727.6e-6;//A/micron I_on_p[1] = + // I_on_n[1] / 2; nmos_effective_resistance_multiplier = 1.99; + // n_to_p_eff_curr_drv_ratio[1] = 2; + // gmp_to_gmn_multiplier[1] = 0.99; + // Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / + // I_on_n[1];//ohm-micron Rpchannelon[1] = + // n_to_p_eff_curr_drv_ratio[1] + // * Rnchannelon[1];//ohm-micron I_off_n[1][0] = 2.43e-11; + // I_off_n[1][10] = 4.85e-11; + // I_off_n[1][20] = 9.68e-11; + // I_off_n[1][30] = 1.94e-10; + // I_off_n[1][40] = 3.87e-10; + // I_off_n[1][50] = 7.73e-10; + // I_off_n[1][60] = 3.55e-10; + // I_off_n[1][70] = 3.09e-9; + // I_off_n[1][80] = 6.19e-9; + // I_off_n[1][90] = 1.24e-8; + // I_off_n[1][100]= 2.48e-8; + // + // // for 22nm LSTP HP + // I_g_on_n[1][0] = 4.51e-10;//A/micron + // I_g_on_n[1][10] = 4.51e-10; + // I_g_on_n[1][20] = 4.51e-10; + // I_g_on_n[1][30] = 4.51e-10; + // I_g_on_n[1][40] = 4.51e-10; + // I_g_on_n[1][50] = 4.51e-10; + // I_g_on_n[1][60] = 4.51e-10; + // I_g_on_n[1][70] = 4.51e-10; + // I_g_on_n[1][80] = 4.51e-10; + // I_g_on_n[1][90] = 4.51e-10; + // I_g_on_n[1][100] = 4.51e-10; + + if (ram_cell_tech_type == 3) { + } else if (ram_cell_tech_type == 4) { + // 22 nm commodity DRAM cell access transistor technology parameters. + // parameters + curr_vdd_dram_cell = 0.9; // 0.45;//This value has reduced greatly in + // 2007 ITRS for all technology nodes. In + // 2005 ITRS, the value was about twice the value in 2007 ITRS + Lphy[3] = 0.022; // micron + Lelec[3] = 0.0181; // micron. + curr_v_th_dram_access_transistor = 1; // V + width_dram_access_transistor = 0.022; // micron + curr_I_on_dram_cell = + 20e-6; // This is a typical value that I have always + // kept constant. In reality this could perhaps be lower + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; // A + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.022 * 0.022; // micron2. + curr_asp_ratio_cell_dram = 0.667; + curr_c_dram_cell = 30e-15; // This is a typical value that I have alwaus + // kept constant. + + // 22 nm commodity DRAM wordline transistor parameters obtained using + // MASTAR. + curr_vpp = 2.3; // vpp. V + t_ox[3] = 3.5e-3; // micron + v_th[3] = 1.0; // V + c_ox[3] = 9.06e-15; // F/micron2 + mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6); // micron2 / Vs + Vdsat[3] = 0.0972; // V/micron + c_g_ideal[3] = 1.99e-16; // F/micron + c_fringe[3] = 0.053e-15; // F/micron + c_junc[3] = 1e-15; // F/micron2 + I_on_n[3] = 910.5e-6; // A/micron + I_on_p[3] = I_on_n[3] / 2; // This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = + 1.69; // Using the value from 32nm. + // + n_to_p_eff_curr_drv_ratio[3] = 1.95; // Using the value from 32nm + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / + I_on_n[3]; // ohm-micron + Rpchannelon[3] = + n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; // ohm-micron + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.1e-13; // A/micron + I_off_n[3][10] = 2.11e-13; + I_off_n[3][20] = 3.88e-13; + I_off_n[3][30] = 6.9e-13; + I_off_n[3][40] = 1.19e-12; + I_off_n[3][50] = 1.98e-12; + I_off_n[3][60] = 3.22e-12; + I_off_n[3][70] = 5.09e-12; + I_off_n[3][80] = 7.85e-12; + I_off_n[3][90] = 1.18e-11; + I_off_n[3][100] = 1.72e-11; + + } else { + // some error handler + } + + // SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + // CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + // Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = + 1.2; // die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = + 1.1; // EDA placement and routing tool rule of thumb + } /* - * TODO:WL_Vcc does not need to retain data as long as the wordline enable signal is not active (of course enable signal will not be active since it is idle) - * So, the WL_Vcc only need to balance the leakage reduction and the required waking up restore time (as mentioned in the 4.0Ghz 291 Mb SRAM Intel Paper) - */ - g_tp.peri_global.Vdd += curr_alpha * vdd_real[peri_global_tech_type];//real vdd, user defined or itrs - g_tp.peri_global.Vdd_default += curr_alpha * vdd[peri_global_tech_type];//itrs vdd this does not have to do within line interpolation loop, can be assigned directly - g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; - g_tp.peri_global.Vcc_min_default += g_tp.peri_global.Vdd_default * 0.45;// Use minimal voltage to keep the device conducted.//g_tp.peri_global.Vth; - g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; - g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; + * TODO:WL_Vcc does not need to retain data as long as the wordline enable + * signal is not active (of course enable signal will not be active since it + * is idle) So, the WL_Vcc only need to balance the leakage reduction and + * the required waking up restore time (as mentioned in the 4.0Ghz 291 Mb + * SRAM Intel Paper) + */ + g_tp.peri_global.Vdd += + curr_alpha * + vdd_real[peri_global_tech_type]; // real vdd, user defined or itrs + g_tp.peri_global.Vdd_default += + curr_alpha * + vdd[peri_global_tech_type]; // itrs vdd this does not have to do within + // line interpolation loop, can be assigned + // directly + g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; + g_tp.peri_global.Vcc_min_default += + g_tp.peri_global.Vdd_default * + 0.45; // Use minimal voltage to keep the device + // conducted.//g_tp.peri_global.Vth; + g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; + g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; - g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; - g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; - g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; - g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; - g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; - g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; - g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; - g_tp.peri_global.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; - g_tp.peri_global.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; - g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];//*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//Consider the voltage change may affect the current density as well. TODO: polynomial curve-fitting based on MASTAR may not be accurate enough - g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];//*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//To mimic the Vdd effect on Ioff (for the same device, dvs should not change default Ioff---only changes if device is different?? but MASTAR shows different results) - g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; - g_tp.peri_global.Mobility_n += curr_alpha *mobility_eff[peri_global_tech_type]; - - //Sleep tx uses LSTP devices - g_tp.sleep_tx.Vdd += curr_alpha * vdd_real[1]; - g_tp.sleep_tx.Vdd_default += curr_alpha * vdd[1]; - g_tp.sleep_tx.Vth += curr_alpha * v_th[1]; - g_tp.sleep_tx.Vcc_min_default += g_tp.sleep_tx.Vdd; - g_tp.sleep_tx.Vcc_min = g_tp.sleep_tx.Vcc_min_default;//user cannot change this, has to be decided by technology - g_tp.sleep_tx.t_ox += curr_alpha * t_ox[1]; - g_tp.sleep_tx.C_ox += curr_alpha * c_ox[1]; + g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; + g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; + g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; + g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; + g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; + g_tp.peri_global.R_nch_on += + curr_alpha * Rnchannelon[peri_global_tech_type]; + g_tp.peri_global.R_pch_on += + curr_alpha * Rpchannelon[peri_global_tech_type]; + g_tp.peri_global.n_to_p_eff_curr_drv_ratio += + curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; + g_tp.peri_global.long_channel_leakage_reduction += + curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; + g_tp.peri_global.I_off_n += + curr_alpha * + I_off_n + [peri_global_tech_type] + [g_ip->temp - + 300]; //*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//Consider + // the voltage change may affect the current density as + // well. TODO: polynomial curve-fitting based on MASTAR may + // not be accurate enough + g_tp.peri_global.I_off_p += + curr_alpha * + I_off_n + [peri_global_tech_type] + [g_ip->temp - + 300]; //*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//To + // mimic the Vdd effect on Ioff (for the same device, dvs + // should not change default Ioff---only changes if device + // is different?? but MASTAR shows different results) + g_tp.peri_global.I_g_on_n += + curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_g_on_p += + curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + gmp_to_gmn_multiplier_periph_global += + curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; + g_tp.peri_global.Mobility_n += + curr_alpha * mobility_eff[peri_global_tech_type]; + + // Sleep tx uses LSTP devices + g_tp.sleep_tx.Vdd += curr_alpha * vdd_real[1]; + g_tp.sleep_tx.Vdd_default += curr_alpha * vdd[1]; + g_tp.sleep_tx.Vth += curr_alpha * v_th[1]; + g_tp.sleep_tx.Vcc_min_default += g_tp.sleep_tx.Vdd; + g_tp.sleep_tx.Vcc_min = + g_tp.sleep_tx.Vcc_min_default; // user cannot change this, has to be + // decided by technology + g_tp.sleep_tx.t_ox += curr_alpha * t_ox[1]; + g_tp.sleep_tx.C_ox += curr_alpha * c_ox[1]; g_tp.sleep_tx.C_g_ideal += curr_alpha * c_g_ideal[1]; - g_tp.sleep_tx.C_fringe += curr_alpha * c_fringe[1]; - g_tp.sleep_tx.C_junc += curr_alpha * c_junc[1]; - g_tp.sleep_tx.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sleep_tx.l_phy += curr_alpha * Lphy[1]; - g_tp.sleep_tx.l_elec += curr_alpha * Lelec[1]; - g_tp.sleep_tx.I_on_n += curr_alpha * I_on_n[1]; - g_tp.sleep_tx.R_nch_on += curr_alpha * Rnchannelon[1]; - g_tp.sleep_tx.R_pch_on += curr_alpha * Rpchannelon[1]; - g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[1]; - g_tp.sleep_tx.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[1]; - g_tp.sleep_tx.I_off_n += curr_alpha * I_off_n[1][g_ip->temp - 300];//**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4); - g_tp.sleep_tx.I_off_p += curr_alpha * I_off_n[1][g_ip->temp - 300];//**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4); - g_tp.sleep_tx.I_g_on_n += curr_alpha * I_g_on_n[1][g_ip->temp - 300]; - g_tp.sleep_tx.I_g_on_p += curr_alpha * I_g_on_n[1][g_ip->temp - 300]; - g_tp.sleep_tx.Mobility_n += curr_alpha *mobility_eff[1]; - // gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[1]; - - g_tp.sram_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type]; - g_tp.sram_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.sram_cell.Vcc_min_default += g_tp.sram_cell.Vdd_default * 0.6; - g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.sleep_tx.C_fringe += curr_alpha * c_fringe[1]; + g_tp.sleep_tx.C_junc += curr_alpha * c_junc[1]; + g_tp.sleep_tx.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.sleep_tx.l_phy += curr_alpha * Lphy[1]; + g_tp.sleep_tx.l_elec += curr_alpha * Lelec[1]; + g_tp.sleep_tx.I_on_n += curr_alpha * I_on_n[1]; + g_tp.sleep_tx.R_nch_on += curr_alpha * Rnchannelon[1]; + g_tp.sleep_tx.R_pch_on += curr_alpha * Rpchannelon[1]; + g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio += + curr_alpha * n_to_p_eff_curr_drv_ratio[1]; + g_tp.sleep_tx.long_channel_leakage_reduction += + curr_alpha * long_channel_leakage_reduction[1]; + g_tp.sleep_tx.I_off_n += + curr_alpha * + I_off_n[1][g_ip->temp - + 300]; //**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4); + g_tp.sleep_tx.I_off_p += + curr_alpha * + I_off_n[1][g_ip->temp - + 300]; //**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4); + g_tp.sleep_tx.I_g_on_n += curr_alpha * I_g_on_n[1][g_ip->temp - 300]; + g_tp.sleep_tx.I_g_on_p += curr_alpha * I_g_on_n[1][g_ip->temp - 300]; + g_tp.sleep_tx.Mobility_n += curr_alpha * mobility_eff[1]; + // gmp_to_gmn_multiplier_periph_global += curr_alpha * + // gmp_to_gmn_multiplier[1]; + + g_tp.sram_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type]; + g_tp.sram_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.sram_cell.Vcc_min_default += g_tp.sram_cell.Vdd_default * 0.6; + g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4); - g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4); - g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; - g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; - g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += + curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.sram_cell.long_channel_leakage_reduction += + curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.sram_cell.I_off_n += + curr_alpha * + I_off_n[ram_cell_tech_type] + [g_ip->temp - + 300]; //**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4); + g_tp.sram_cell.I_off_p += + curr_alpha * + I_off_n[ram_cell_tech_type] + [g_ip->temp - + 300]; //**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4); + g_tp.sram_cell.I_g_on_n += + curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_g_on_p += + curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; + g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; + g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; - g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; - g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; - g_tp.vpp += curr_alpha * curr_vpp; - g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; - g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; - g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - - g_tp.cam_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type]; - g_tp.cam_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; + g_tp.dram_cell_I_off_worst_case_len_temp += + curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; + g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; + g_tp.vpp += curr_alpha * curr_vpp; + g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; + g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += + curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; + g_tp.dram_wl.long_channel_leakage_reduction += + curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; + g_tp.dram_wl.I_off_n += + curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + g_tp.dram_wl.I_off_p += + curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + + g_tp.cam_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type]; + g_tp.cam_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//*pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4); - g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4); - g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; + g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += + curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.cam_cell.long_channel_leakage_reduction += + curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.cam_cell.I_off_n += + curr_alpha * + I_off_n[ram_cell_tech_type] + [g_ip->temp - + 300]; //*pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4); + g_tp.cam_cell.I_off_p += + curr_alpha * + I_off_n[ram_cell_tech_type] + [g_ip->temp - + 300]; //**pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4); + g_tp.cam_cell.I_g_on_n += + curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_g_on_p += + curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; - area_cell_dram += curr_alpha * curr_area_cell_dram; - asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; + area_cell_dram += curr_alpha * curr_area_cell_dram; + asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; - g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; + g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; area_cell_sram += curr_alpha * curr_area_cell_sram; asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; - g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng + g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam; // sheng g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; area_cell_cam += curr_alpha * curr_area_cell_cam; asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; - //Sense amplifier latch Gm calculation - mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; + // Sense amplifier latch Gm calculation + mobility_eff_periph_global += + curr_alpha * mobility_eff[peri_global_tech_type]; Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - //Empirical undifferetiated core/FU coefficient - g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; + // Empirical undifferetiated core/FU coefficient + g_tp.scaling_factor.logic_scaling_co_eff += + curr_alpha * curr_logic_scaling_co_eff; g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; - g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; + g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; - g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; + g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; } - - //Currently we are not modeling the resistance/capacitance of poly anywhere. - //following data are continuous function (or data have been processed) does not need linear interpolation - g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process - g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process - g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + // Currently we are not modeling the resistance/capacitance of poly anywhere. + // following data are continuous function (or data have been processed) does + // not need linear interpolation + g_tp.w_comp_inv_p1 = + 12.5 * g_ip->F_sz_um; // this was 10 micron for the 0.8 micron process + g_tp.w_comp_inv_n1 = + 7.5 * g_ip->F_sz_um; // this was 6 micron for the 0.8 micron process + g_tp.w_comp_inv_p2 = + 25 * g_ip->F_sz_um; // this was 20 micron for the 0.8 micron process + g_tp.w_comp_inv_n2 = + 15 * g_ip->F_sz_um; // this was 12 micron for the 0.8 micron process + g_tp.w_comp_inv_p3 = + 50 * g_ip->F_sz_um; // this was 40 micron for the 0.8 micron process + g_tp.w_comp_inv_n3 = + 30 * g_ip->F_sz_um; // this was 24 micron for the 0.8 micron process + g_tp.w_eval_inv_p = + 100 * g_ip->F_sz_um; // this was 80 micron for the 0.8 micron process + g_tp.w_eval_inv_n = + 50 * g_ip->F_sz_um; // this was 40 micron for the 0.8 micron process + g_tp.w_comp_n = + 12.5 * g_ip->F_sz_um; // this was 10 micron for the 0.8 micron process + g_tp.w_comp_p = + 37.5 * g_ip->F_sz_um; // this was 30 micron for the 0.8 micron process g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; @@ -1937,410 +2074,436 @@ void init_tech_params(double technology, bool is_tag) g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; - g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process - g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process - g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process - g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process - g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; + g_tp.w_iso = 12.5 * g_ip->F_sz_um; // was 10 micron for the 0.8 micron process + g_tp.w_sense_n = 3.75 * g_ip->F_sz_um; // sense amplifier N-trans; was 3 + // micron for the 0.8 micron process + g_tp.w_sense_p = 7.5 * g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron + // for the 0.8 micron process + g_tp.w_sense_en = + 5 * g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was + // 4 micron for the 0.8 micron process + g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_; - if (ram_cell_tech_type == comm_dram) - { + if (ram_cell_tech_type == comm_dram) { g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; - g_tp.h_dec = 8; // in the unit of memory cell height - } - else - { + g_tp.h_dec = 8; // in the unit of memory cell height + } else { g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; - g_tp.h_dec = 4; // in the unit of memory cell height + g_tp.h_dec = 4; // in the unit of memory cell height } g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; - g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; - g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; + g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; + g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; - //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; + // g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; - double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; - double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; - g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch * pow((g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/(g_tp.peri_global.Vdd_default-g_tp.peri_global.Vth),1.3)/(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default); + double gmn_sense_amp_latch = + (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * + (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; + double gmp_sense_amp_latch = + gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; + g_tp.gm_sense_amp_latch = + gmn_sense_amp_latch + + gmp_sense_amp_latch * + pow((g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / + (g_tp.peri_global.Vdd_default - g_tp.peri_global.Vth), + 1.3) / + (g_tp.peri_global.Vdd / g_tp.peri_global.Vdd_default); g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; - g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng + g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam)); // Sheng g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; - g_tp.sram.Vbitpre = g_tp.sram_cell.Vdd;//vdd[ram_cell_tech_type]; - g_tp.cam.Vbitpre = g_tp.cam_cell.Vdd;//vdd[ram_cell_tech_type];//Sheng + g_tp.sram.Vbitpre = g_tp.sram_cell.Vdd; // vdd[ram_cell_tech_type]; + g_tp.cam.Vbitpre = g_tp.cam_cell.Vdd; // vdd[ram_cell_tech_type];//Sheng pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - //DVS and power-gating voltage finalization - if ((g_tp.sram_cell.Vcc_min_default > g_tp.sram_cell.Vdd - || g_tp.peri_global.Vdd < g_tp.peri_global.Vdd_default*0.75 - || g_tp.sram_cell.Vdd < g_tp.sram_cell.Vdd_default*0.75) && (!g_ip->is_main_mem)) - { - cerr << "User defined Vdd is too low.\n\n"<< endl; - exit(0); - } - - if (g_ip->specific_vcc_min) - { - g_tp.sram_cell.Vcc_min = g_ip->user_defined_vcc_min; - g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min; - g_tp.sram.Vbitfloating = g_tp.sram.Vbitpre*0.7*(g_tp.sram_cell.Vcc_min/g_tp.peri_global.Vcc_min_default); -// if (g_ip->user_defined_vcc_min < g_tp.peri_global.Vcc_min_default) -// { -// g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min; -// } -// else { -// -// } - } - else - { - g_tp.sram_cell.Vcc_min = g_tp.sram_cell.Vcc_min_default; - g_tp.peri_global.Vcc_min = g_tp.peri_global.Vcc_min_default; - g_tp.sram.Vbitfloating = g_tp.sram.Vbitpre*0.7; + // DVS and power-gating voltage finalization + if ((g_tp.sram_cell.Vcc_min_default > g_tp.sram_cell.Vdd || + g_tp.peri_global.Vdd < g_tp.peri_global.Vdd_default * 0.75 || + g_tp.sram_cell.Vdd < g_tp.sram_cell.Vdd_default * 0.75) && + (!g_ip->is_main_mem)) { + cerr << "User defined Vdd is too low.\n\n" << endl; + exit(0); } - if (g_tp.sram_cell.Vcc_min < g_tp.sram_cell.Vcc_min_default )//if want to compute multiple power-gating vdd settings in one run, should have multiple results copies (each copy containing such flag) in update_pg () - { - g_ip->user_defined_vcc_underflow = true; + if (g_ip->specific_vcc_min) { + g_tp.sram_cell.Vcc_min = g_ip->user_defined_vcc_min; + g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min; + g_tp.sram.Vbitfloating = + g_tp.sram.Vbitpre * 0.7 * + (g_tp.sram_cell.Vcc_min / g_tp.peri_global.Vcc_min_default); + // if (g_ip->user_defined_vcc_min < g_tp.peri_global.Vcc_min_default) + // { + // g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min; + // } + // else { + // + // } + } else { + g_tp.sram_cell.Vcc_min = g_tp.sram_cell.Vcc_min_default; + g_tp.peri_global.Vcc_min = g_tp.peri_global.Vcc_min_default; + g_tp.sram.Vbitfloating = g_tp.sram.Vbitpre * 0.7; } - else + + if (g_tp.sram_cell.Vcc_min < + g_tp.sram_cell + .Vcc_min_default) // if want to compute multiple power-gating vdd + // settings in one run, should have multiple results + // copies (each copy containing such flag) in + // update_pg () { - g_ip->user_defined_vcc_underflow = false; + g_ip->user_defined_vcc_underflow = true; + } else { + g_ip->user_defined_vcc_underflow = false; } - if ((g_tp.sram_cell.Vcc_min > g_tp.sram_cell.Vdd - || g_tp.peri_global.Vcc_min > g_tp.peri_global.Vdd)&& (!g_ip->is_main_mem)) - { - cerr << "User defined power-saving supply voltage cannot be lower than Vdd (DVS0).\n\n"<< endl; - exit(0); - } - double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; - - for (iter=0; iter<=1; ++iter) - { + if ((g_tp.sram_cell.Vcc_min > g_tp.sram_cell.Vdd || + g_tp.peri_global.Vcc_min > g_tp.peri_global.Vdd) && + (!g_ip->is_main_mem)) { + cerr << "User defined power-saving supply voltage cannot be lower than Vdd " + "(DVS0).\n\n" + << endl; + exit(0); + } + double wire_pitch[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES] + [NUMBER_WIRE_TYPES], + wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES] + [NUMBER_WIRE_TYPES], + horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES] + [NUMBER_WIRE_TYPES], + vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES] + [NUMBER_WIRE_TYPES], + aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; + + for (iter = 0; iter <= 1; ++iter) { // linear interpolation - if (iter == 0) - { + if (iter == 0) { tech = tech_lo; - if (tech_lo == tech_hi) - { + if (tech_lo == tech_hi) { curr_alpha = 1; + } else { + curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi); } - else - { - curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi); - } - } - else - { + } else { tech = tech_hi; - if (tech_lo == tech_hi) - { + if (tech_lo == tech_hi) { break; - } - else - { - curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi); + } else { + curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi); } } - if (tech == 180) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.0; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.017;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.75;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + if (tech == 180) { + // Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; // micron + aspect_ratio[0][0] = 2.0; + wire_width = wire_pitch[0][0] / 2; // micron + wire_thickness = aspect_ratio[0][0] * wire_width; // micron + wire_spacing = wire_pitch[0][0] - wire_width; // micron + barrier_thickness = 0.017; // micron + dishing_thickness = 0; // micron + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance( + CU_RESISTIVITY, wire_width, wire_thickness, barrier_thickness, + dishing_thickness, alpha_scatter); // ohm/micron + ild_thickness[0][0] = 0.75; // micron + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.709; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; // F/micron + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.75;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.2; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 1.5; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0]= 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.017; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.75; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.75; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.98; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.18; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); - wire_r_per_micron[1][3] = 12 / 0.18; - } - else if (tech == 90) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron + fringe_cap); // F/micron. + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.4; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.75; // micron + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.709; + vert_dielectric_constant[0][1] = 3.9; + fringe_cap = 0.115e-15; // F/micron + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.2; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 1.5; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.709; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); + + // Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.017; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.75; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.75; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.98; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.18; + wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); + wire_r_per_micron[1][3] = 12 / 0.18; + } else if (tech == 90) { + // Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; // micron aspect_ratio[0][0] = 2.4; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.01;//micron - dishing_thickness = 0;//micron + wire_width = wire_pitch[0][0] / 2; // micron + wire_thickness = aspect_ratio[0][0] * wire_width; // micron + wire_spacing = wire_pitch[0][0] - wire_width; // micron + barrier_thickness = 0.01; // micron + dishing_thickness = 0; // micron alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.48;//micron + wire_r_per_micron[0][0] = wire_resistance( + CU_RESISTIVITY, wire_width, wire_thickness, barrier_thickness, + dishing_thickness, alpha_scatter); // ohm/micron + ild_thickness[0][0] = 0.48; // micron miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 2.709; vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + fringe_cap = 0.115e-15; // F/micron + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap);//F/micron. + fringe_cap); // F/micron. wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 2.4; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.48;//micron + wire_r_per_micron[0][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.48; // micron miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 2.709; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 2.7; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][2] = 0.96; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 2.709; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); - //Conservative projections + // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; + aspect_ratio[1][0] = 2.0; wire_width = wire_pitch[1][0] / 2; wire_thickness = aspect_ratio[1][0] * wire_width; wire_spacing = wire_pitch[1][0] - wire_width; barrier_thickness = 0.008; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.48; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.48; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.48; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.48; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; + aspect_ratio[1][2] = 2.2; wire_width = wire_pitch[1][2] / 2; wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.1; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.1; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.09; wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); wire_r_per_micron[1][3] = 12 / 0.09; - } - else if (tech == 65) - { - //Aggressive projections + } else if (tech == 65) { + // Aggressive projections wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 2.7; + aspect_ratio[0][0] = 2.7; wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; + wire_thickness = aspect_ratio[0][0] * wire_width; wire_spacing = wire_pitch[0][0] - wire_width; barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.405; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.303; - vert_dielectric_constant[0][0] = 3.9; + wire_r_per_micron[0][0] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.405; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.303; + vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.7; - wire_thickness = aspect_ratio[0][1] * wire_width; + aspect_ratio[0][1] = 2.7; + wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.405; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.303; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); + wire_r_per_micron[0][1] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.405; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.303; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 2.8; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][2] = 0.81; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.303; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.303; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); + + // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; aspect_ratio[1][0] = 2.0; wire_width = wire_pitch[1][0] / 2; @@ -2349,105 +2512,115 @@ void init_tech_params(double technology, bool is_tag) barrier_thickness = 0.006; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][0] = 0.405; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.734; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][1] = 0.405; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.734; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; wire_width = wire_pitch[1][2] / 2; wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.77; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.734; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.065; wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); wire_r_per_micron[1][3] = 12 / 0.065; - } - else if (tech == 45) - { - //Aggressive projections. + } else if (tech == 45) { + // Aggressive projections. wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; + aspect_ratio[0][0] = 3.0; wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; + wire_thickness = aspect_ratio[0][0] * wire_width; wire_spacing = wire_pitch[0][0] - wire_width; barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.315; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.958; - vert_dielectric_constant[0][0] = 3.9; + wire_r_per_micron[0][0] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.315; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.958; + vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; + aspect_ratio[0][1] = 3.0; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.315; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.958; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); + wire_r_per_micron[0][1] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.315; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.958; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][2] = 0.63; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.958; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.958; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); + + // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; aspect_ratio[1][0] = 2.0; wire_width = wire_pitch[1][0] / 2; @@ -2456,32 +2629,36 @@ void init_tech_params(double technology, bool is_tag) barrier_thickness = 0.004; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][0] = 0.315; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.46; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][1] = 0.315; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.46; vert_dielectric_constant[1][1] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2489,23 +2666,23 @@ void init_tech_params(double technology, bool is_tag) wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.55; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.46; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.045; wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); wire_r_per_micron[1][3] = 12 / 0.045; - } - else if (tech == 32) - { - //Aggressive projections. + } else if (tech == 32) { + // Aggressive projections. wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; aspect_ratio[0][0] = 3.0; wire_width = wire_pitch[0][0] / 2; @@ -2514,48 +2691,54 @@ void init_tech_params(double technology, bool is_tag) barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][0] = 0.21; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 1.664; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 3.0; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][1] = 0.21; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 1.664; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[0][2] = 0.42; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 1.664; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); - //Conservative projections + // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; aspect_ratio[1][0] = 2.0; wire_width = wire_pitch[1][0] / 2; @@ -2564,460 +2747,561 @@ void init_tech_params(double technology, bool is_tag) barrier_thickness = 0.003; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][0] = 0.21; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.214; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; aspect_ratio[1][1] = 2.0; wire_width = wire_pitch[1][1] / 2; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][1] = 0.21; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.214; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; wire_width = wire_pitch[1][2] / 2; wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.385; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.214; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.032;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron - wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.032; // micron + wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032); // F/micron + wire_r_per_micron[1][3] = 12 / 0.032; // ohm/micron + } else if (tech == 22) { + // Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; // local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.15; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.414; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; // semi-global + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.15; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.414; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; // global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.3; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.414; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); + + // //************************* + // wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][4] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][4] - wire_width; + // wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][4] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + // + // wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][5] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][5] - wire_width; + // wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][5] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + // + // wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][6] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][6] - wire_width; + // wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][6] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + //************************* + + // Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.003; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.15; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.104; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.15; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.104; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.275; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.104; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.022; // micron + wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022); // F/micron + wire_r_per_micron[1][3] = 12 / 0.022; // ohm/micron + + //****************** + // wire_pitch[1][4] = 16 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][4] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][4] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][4] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); + // + // wire_pitch[1][5] = 24 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][5] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][5] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][5] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); + // + // wire_pitch[1][6] = 32 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][6] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][6] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][6] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); } - else if (tech == 22) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.15; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.414; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.15; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.414; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.3; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.414; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.15; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.104; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.15; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.104; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.275; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.104; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.022;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron - wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - - else if (tech == 16) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.108; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.202; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - aspect_ratio[0][1] = 3.0; - wire_width = wire_pitch[0][1] / 2; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.108; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.202; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.216; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.202; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.002; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.108; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 1.998; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.108; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 1.998; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.198; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 1.998; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.016;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron - wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - - g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - - g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - - g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; - - g_tp.sense_delay += curr_alpha *SENSE_AMP_D; - g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P; -// g_tp.horiz_dielectric_constant += horiz_dielectric_constant; -// g_tp.vert_dielectric_constant += vert_dielectric_constant; -// g_tp.aspect_ratio += aspect_ratio; -// g_tp.miller_value += miller_value; -// g_tp.ild_thickness += ild_thickness; + else if (tech == 16) { + // Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; // local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.108; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.202; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], + miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; // semi-global + aspect_ratio[0][1] = 3.0; + wire_width = wire_pitch[0][1] / 2; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.108; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.202; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], + miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; // global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = + wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.216; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.202; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], + miller_value[0][2], horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], fringe_cap); + + // //************************* + // wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][4] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][4] - wire_width; + // wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][4] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + // + // wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][5] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][5] - wire_width; + // wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][5] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + // + // wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global + // aspect_ratio = 3.0; + // wire_width = wire_pitch[0][6] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[0][6] - wire_width; + // wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, + // wire_width, + // wire_thickness, barrier_thickness, + // dishing_thickness, alpha_scatter); + // ild_thickness = 0.3; + // wire_c_per_micron[0][6] = wire_capacitance(wire_width, + // wire_thickness, wire_spacing, + // ild_thickness, miller_value, + // horiz_dielectric_constant, vert_dielectric_constant, + // fringe_cap); + //************************* + + // Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.002; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.108; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 1.998; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], + miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.108; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 1.998; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], + miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = + wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, + barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.198; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 1.998; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance( + wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], + miller_value[1][2], horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], fringe_cap); + // Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.016; // micron + wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016); // F/micron + wire_r_per_micron[1][3] = 12 / 0.016; // ohm/micron + + //****************** + // wire_pitch[1][4] = 16 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][4] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][4] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][4] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); + // + // wire_pitch[1][5] = 24 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][5] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][5] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][5] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); + // + // wire_pitch[1][6] = 32 * g_ip.F_sz_um; + // aspect_ratio = 2.2; + // wire_width = wire_pitch[1][6] / 2; + // wire_thickness = aspect_ratio * wire_width; + // wire_spacing = wire_pitch[1][6] - wire_width; + // dishing_thickness = 0.1 * wire_thickness; + // wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, + // wire_width, wire_thickness, + // barrier_thickness, dishing_thickness, alpha_scatter); + // ild_thickness = 0.275; wire_c_per_micron[1][6] = + // wire_capacitance(wire_width, wire_thickness, wire_spacing, + // ild_thickness, miller_value, horiz_dielectric_constant, + // vert_dielectric_constant, fringe_cap); + } + g_tp.wire_local.pitch += + curr_alpha * wire_pitch[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.R_per_um += + curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.C_per_um += + curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.aspect_ratio += + curr_alpha * aspect_ratio[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.ild_thickness += + curr_alpha * ild_thickness[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.miller_value += + curr_alpha * miller_value[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.horiz_dielectric_constant += + curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.vert_dielectric_constant += + curr_alpha * + vert_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + + g_tp.wire_inside_mat.pitch += + curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.R_per_um += + curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.C_per_um += + curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.aspect_ratio += + curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.ild_thickness += + curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.miller_value += + curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.horiz_dielectric_constant += + curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.vert_dielectric_constant += + curr_alpha * + vert_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + + g_tp.wire_outside_mat.pitch += + curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.R_per_um += + curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.C_per_um += + curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.aspect_ratio += + curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.ild_thickness += + curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.miller_value += + curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.horiz_dielectric_constant += + curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.vert_dielectric_constant += + curr_alpha * + vert_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + + g_tp.unit_len_wire_del = + g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; + + g_tp.sense_delay += curr_alpha * SENSE_AMP_D; + g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P; + // g_tp.horiz_dielectric_constant += horiz_dielectric_constant; + // g_tp.vert_dielectric_constant += vert_dielectric_constant; + // g_tp.aspect_ratio += aspect_ratio; + // g_tp.miller_value += miller_value; + // g_tp.ild_thickness += ild_thickness; } g_tp.fringe_cap = fringe_cap; @@ -3028,9 +3312,9 @@ void init_tech_params(double technology, bool is_tag) g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); double KLOAD = 1; c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, + g_tp.cell_h_def) + gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); tf = rd * c_load; g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); } - diff --git a/cacti/uca.cc b/cacti/uca.cc old mode 100755 new mode 100644 index df1671b..4972549 --- a/cacti/uca.cc +++ b/cacti/uca.cc @@ -29,413 +29,448 @@ * ***************************************************************************/ - +#include "uca.h" #include #include -#include "uca.h" - - -UCA::UCA(const DynamicParameter & dyn_p) - :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) -{ - int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); - int num_banks_hor_dir = nbanks/num_banks_ver_dir; - - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; +UCA::UCA(const DynamicParameter &dyn_p) + : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { + int num_banks_ver_dir = + 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) / 2 + : (_log2(nbanks) - _log2(nbanks) / 2)); + int num_banks_hor_dir = nbanks / num_banks_ver_dir; + + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; } - num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); - num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); - num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; - num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; + num_addr_b_bank = + (dp.number_addr_bits_mat + dp.number_subbanks_decode) * (RWP + ERP + EWP); + num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); + num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); + num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; + num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; - if (!dp.fully_assoc && !dp.pure_cam) - { + if (!dp.fully_assoc && !dp.pure_cam) { - if (g_ip->fast_access && dp.is_tag == false) - { - num_do_b_bank *= g_ip->data_assoc; - } - - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - } + if (g_ip->fast_access && dp.is_tag == false) { + num_do_b_bank *= g_ip->data_assoc; + } - else - { + htree_in_add = + new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, + num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Add_htree, true); + htree_in_data = + new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, + num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_data = + new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, + num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + } - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); + else { + + htree_in_add = new Htree2( + g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Add_htree, true); + htree_in_data = new Htree2( + g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_data = new Htree2( + g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + htree_in_search = new Htree2( + g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_search = new Htree2( + g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); } area.w = htree_in_data->area.w; area.h = htree_in_data->area.h; - area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; -// cout<<"area cell"<nbanks; + // cout<<"area cell"<delay + bank.htree_in_add->delay; - double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; + double max_delay_before_row_decoder = + delay_array_to_mat + bank.mat.r_predec->delay; delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_1_predec->delay + - bank.mat.sa_mux_lev_1_dec->delay; + bank.mat.sa_mux_lev_1_predec->delay + + bank.mat.sa_mux_lev_1_dec->delay; delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_2_predec->delay + - bank.mat.sa_mux_lev_2_dec->delay; - double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + bank.mat.sa_mux_lev_2_predec->delay + + bank.mat.sa_mux_lev_2_dec->delay; + double delay_inside_mat = + bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; delay_before_subarray_output_driver = - MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path - delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path - MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path - delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path + MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path + delay_array_to_mat + bank.mat.b_mux_predec->delay + + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path + MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path + delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + - bank.htree_out_data->delay + htree_out_data->delay; - access_time = bank.mat.delay_comparator; + bank.htree_out_data->delay + + htree_out_data->delay; + access_time = bank.mat.delay_comparator; double ram_delay_inside_mat; - if (dp.fully_assoc) - { - //delay of FA contains both CAM tag and RAM data - { //delay of CAM - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + if (dp.fully_assoc) { + // delay of FA contains both CAM tag and RAM data + { // delay of CAM + ram_delay_inside_mat = + bank.mat.delay_bitline + bank.mat.delay_matchchline; access_time = htree_in_add->delay + bank.htree_in_add->delay; - //delay of fully-associative data array + // delay of fully-associative data array access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; } - } - else - { - access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path + } else { + access_time = delay_before_subarray_output_driver + + delay_from_subarray_out_drv_to_out; // data_acc_path } - if (dp.is_main_mem) - { - double t_rcd = max_delay_before_row_decoder + delay_inside_mat; - double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + + if (dp.is_main_mem) { + double t_rcd = max_delay_before_row_decoder + delay_inside_mat; + double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, + delay_array_to_sa_mux_lev_2_decoder) + delay_from_subarray_out_drv_to_out; access_time = t_rcd + cas_latency; } double temp; - if (!dp.fully_assoc) - { - temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit - if (dp.is_dram) - { - temp += bank.mat.delay_writeback; // temp stores random cycle time + if (!dp.fully_assoc) { + temp = delay_inside_mat + bank.mat.delay_wl_reset + + bank.mat.delay_bl_restore; // TODO: Sheng: revisit + if (dp.is_dram) { + temp += bank.mat.delay_writeback; // temp stores random cycle time } - - temp = MAX(temp, bank.mat.r_predec->delay); - temp = MAX(temp, bank.mat.b_mux_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + temp = MAX(temp, bank.mat.r_predec->delay); + temp = MAX(temp, bank.mat.b_mux_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } else { + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore + + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; + + temp = MAX( + temp, + bank.mat.b_mux_predec->delay); // TODO: Sheng revisit whether + // distinguish cam and ram bitline etc. + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); } - else - { - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; - temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore - + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; - - temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); - } - - // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav - if (g_ip->rpters_in_htree == false) - { + + // The following is true only if the input parameter "repeaters_in_htree" is + // set to false --Nav + if (g_ip->rpters_in_htree == false) { temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); } cycle_time = temp; double delay_req_network = max_delay_before_row_decoder; double delay_rep_network = delay_from_subarray_out_drv_to_out; - multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); + multisubbank_interleave_cycle_time = + MAX(delay_req_network, delay_rep_network); - if (dp.is_main_mem) - { + if (dp.is_main_mem) { multisubbank_interleave_cycle_time = htree_in_add->delay; - precharge_delay = htree_in_add->delay + - bank.htree_in_add->delay + bank.mat.delay_writeback + - bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; + precharge_delay = htree_in_add->delay + bank.htree_in_add->delay + + bank.mat.delay_writeback + bank.mat.delay_wl_reset + + bank.mat.delay_bl_restore; cycle_time = access_time + precharge_delay; - } - else - { + } else { precharge_delay = 0; } double dram_array_availability = 0; - if (dp.is_dram) - { - dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; + if (dp.is_dram) { + dram_array_availability = + (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; } return outrisetime; } - - // note: currently, power numbers are for a bank of an array -void UCA::compute_power_energy() -{ +void UCA::compute_power_energy() { bank.compute_power_energy(); power = bank.power; - power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; - power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; - if (dp.fully_assoc || dp.pure_cam) - { - power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; + power_routing_to_bank.readOp.dynamic = + htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; + power_routing_to_bank.writeOp.dynamic = + htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; + if (dp.fully_assoc || dp.pure_cam) { + power_routing_to_bank.searchOp.dynamic = + htree_in_search->power.searchOp.dynamic + + htree_out_search->power.searchOp.dynamic; } power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + htree_in_data->power.readOp.leakage + htree_out_data->power.readOp.leakage; - power_routing_to_bank.readOp.power_gated_leakage += htree_in_add->power.readOp.power_gated_leakage + - htree_in_data->power.readOp.power_gated_leakage + - htree_out_data->power.readOp.power_gated_leakage; - - power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + - htree_in_data->power.readOp.gate_leakage + - htree_out_data->power.readOp.gate_leakage; - if (dp.fully_assoc || dp.pure_cam) - { - power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; - power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; + power_routing_to_bank.readOp.power_gated_leakage += + htree_in_add->power.readOp.power_gated_leakage + + htree_in_data->power.readOp.power_gated_leakage + + htree_out_data->power.readOp.power_gated_leakage; + + power_routing_to_bank.readOp.gate_leakage += + htree_in_add->power.readOp.gate_leakage + + htree_in_data->power.readOp.gate_leakage + + htree_out_data->power.readOp.gate_leakage; + if (dp.fully_assoc || dp.pure_cam) { + power_routing_to_bank.readOp.leakage += + htree_in_search->power.readOp.leakage + + htree_out_search->power.readOp.leakage; + power_routing_to_bank.readOp.gate_leakage += + htree_in_search->power.readOp.gate_leakage + + htree_out_search->power.readOp.gate_leakage; } power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; power.readOp.leakage += power_routing_to_bank.readOp.leakage; - power.readOp.power_gated_leakage += power_routing_to_bank.readOp.power_gated_leakage; + power.readOp.power_gated_leakage += + power_routing_to_bank.readOp.power_gated_leakage; power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; // calculate total write energy per access - power.writeOp.dynamic = power.readOp.dynamic - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - - power_routing_to_bank.readOp.dynamic - + power_routing_to_bank.writeOp.dynamic - + bank.htree_in_data->power.readOp.dynamic - - bank.htree_out_data->power.readOp.dynamic; - - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + power.writeOp.dynamic = + power.readOp.dynamic - + bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - + power_routing_to_bank.readOp.dynamic + + power_routing_to_bank.writeOp.dynamic + + bank.htree_in_data->power.readOp.dynamic - + bank.htree_out_data->power.readOp.dynamic; + + if (dp.is_dram == false) { + power.writeOp.dynamic -= + bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; } dyn_read_energy_from_closed_page = power.readOp.dynamic; - dyn_read_energy_from_open_page = power.readOp.dynamic - - (bank.mat.r_predec->power.readOp.dynamic + - bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; + dyn_read_energy_from_open_page = + power.readOp.dynamic - + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic + + bank.mat.power_bitline.readOp.dynamic) * + dp.num_act_mats_hor_dir; dyn_read_energy_remaining_words_in_burst = - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * - ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - power_routing_to_bank.readOp.dynamic); + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * + ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * + dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + power_routing_to_bank.readOp.dynamic); dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; - dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; + dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; activate_energy = htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + + bank.htree_in_add->power_bit.readOp.dynamic * + bank.num_addr_b_routed_to_mat_for_act + (bank.mat.r_predec->power.readOp.dynamic + bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; - read_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - htree_in_data->power.readOp.dynamic) * g_ip->burst_len; - write_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - htree_in_data->power.readOp.dynamic + - bank.htree_in_data->power.readOp.dynamic + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; + bank.mat.power_sa.readOp.dynamic) * + dp.num_act_mats_hor_dir; + read_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * + bank.num_addr_b_routed_to_mat_for_rd_or_wr + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * + dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + htree_in_data->power.readOp.dynamic) * + g_ip->burst_len; + write_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * + bank.num_addr_b_routed_to_mat_for_rd_or_wr + + htree_in_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * + dp.num_act_mats_hor_dir) * + g_ip->burst_len; precharge_energy = (bank.mat.power_bitline.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * + dp.num_act_mats_hor_dir; - //The follow 6 parameters are only used in DRAM/eDRAM output for now + // The follow 6 parameters are only used in DRAM/eDRAM output for now leak_power_subbank_closed_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_closed_page_state) * + dp.num_act_mats_hor_dir; leak_power_subbank_closed_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ - //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * + dp.num_act_mats_hor_dir; //+ + // bank.mat.leak_power_sense_amps_closed_page_state) * + // dp.num_act_mats_hor_dir; leak_power_subbank_open_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_open_page_state) * + dp.num_act_mats_hor_dir; leak_power_subbank_open_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; - //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * + dp.num_act_mats_hor_dir; + // bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; leak_power_request_and_reply_networks = - power_routing_to_bank.readOp.leakage + - bank.htree_in_add->power.readOp.leakage + - bank.htree_in_data->power.readOp.leakage + - bank.htree_out_data->power.readOp.leakage; + power_routing_to_bank.readOp.leakage + + bank.htree_in_add->power.readOp.leakage + + bank.htree_in_data->power.readOp.leakage + + bank.htree_out_data->power.readOp.leakage; leak_power_request_and_reply_networks += - power_routing_to_bank.readOp.gate_leakage + - bank.htree_in_add->power.readOp.gate_leakage + - bank.htree_in_data->power.readOp.gate_leakage + - bank.htree_out_data->power.readOp.gate_leakage; - - if (dp.fully_assoc || dp.pure_cam) - { - leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; - leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; + power_routing_to_bank.readOp.gate_leakage + + bank.htree_in_add->power.readOp.gate_leakage + + bank.htree_in_data->power.readOp.gate_leakage + + bank.htree_out_data->power.readOp.gate_leakage; + + if (dp.fully_assoc || dp.pure_cam) { + leak_power_request_and_reply_networks += + htree_in_search->power.readOp.leakage + + htree_out_search->power.readOp.leakage; + leak_power_request_and_reply_networks += + htree_in_search->power.readOp.gate_leakage + + htree_out_search->power.readOp.gate_leakage; } - - if (dp.is_dram) - { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power - refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + - bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; + if (dp.is_dram) { // if DRAM, add contribution of power spent in row + // predecoder drivers, blocks and decoders to refresh power + refresh_power = + (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.row_dec->power.readOp.dynamic) * + dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * + dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * + dp.num_act_mats_hor_dir; refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; refresh_power /= dp.dram_refresh_period; } -// The follow 6 parameters are only used in DRAM/eDRAM output for now - - if (dp.is_tag == false) - { - power.readOp.dynamic = dyn_read_energy_from_closed_page; - power.writeOp.dynamic = dyn_read_energy_from_closed_page - - dyn_read_energy_remaining_words_in_burst - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - + (power_routing_to_bank.writeOp.dynamic - + // The follow 6 parameters are only used in DRAM/eDRAM output for now + + if (dp.is_tag == false) { + power.readOp.dynamic = dyn_read_energy_from_closed_page; + power.writeOp.dynamic = + dyn_read_energy_from_closed_page - + dyn_read_energy_remaining_words_in_burst - + bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + + (power_routing_to_bank.writeOp.dynamic - power_routing_to_bank.readOp.dynamic - bank.htree_out_data->power.readOp.dynamic + bank.htree_in_data->power.readOp.dynamic) * - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); // FIXME - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + if (dp.is_dram == false) { + power.writeOp.dynamic -= + bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; } } // if DRAM, add refresh power to total leakage - if (dp.is_dram) - { + if (dp.is_dram) { power.readOp.leakage += refresh_power; } // TODO: below should be avoided. /*if (dp.is_main_mem) { - power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; + power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * + g_tp.peri_global.Vdd / g_ip->nbanks; }*/ - assert(power.readOp.dynamic > 0); + assert(power.readOp.dynamic > 0); assert(power.writeOp.dynamic > 0); - assert(power.readOp.leakage > 0); + assert(power.readOp.leakage > 0); } - diff --git a/cacti/uca.h b/cacti/uca.h old mode 100755 new mode 100644 index ae03c1f..3e81686 --- a/cacti/uca.h +++ b/cacti/uca.h @@ -29,70 +29,65 @@ * ***************************************************************************/ - - #ifndef __UCA_H__ #define __UCA_H__ #include "area.h" #include "bank.h" #include "component.h" -#include "parameter.h" #include "htree2.h" +#include "parameter.h" - -class UCA : public Component -{ - public: - UCA(const DynamicParameter & dyn_p); - ~UCA(); - double compute_delays(double inrisetime); // returns outrisetime - void compute_power_energy(); - - DynamicParameter dp; - Bank bank; - - Htree2 * htree_in_add; - Htree2 * htree_in_data; - Htree2 * htree_out_data; - Htree2 * htree_in_search; - Htree2 * htree_out_search; - - powerDef power_routing_to_bank; - - uint32_t nbanks; - - int num_addr_b_bank; - int num_di_b_bank; - int num_do_b_bank; - int num_si_b_bank; - int num_so_b_bank; - int RWP, ERP, EWP,SCHP; - double area_all_dataramcells; - - double dyn_read_energy_from_closed_page; - double dyn_read_energy_from_open_page; - double dyn_read_energy_remaining_words_in_burst; - - double refresh_power; // only for DRAM - double activate_energy; - double read_energy; - double write_energy; - double precharge_energy; - double leak_power_subbank_closed_page; - double leak_power_subbank_open_page; - double leak_power_request_and_reply_networks; - - double delay_array_to_sa_mux_lev_1_decoder; - double delay_array_to_sa_mux_lev_2_decoder; - double delay_before_subarray_output_driver; - double delay_from_subarray_out_drv_to_out; - double access_time; - double precharge_delay; - double multisubbank_interleave_cycle_time; - double long_channel_leakage_reduction_periperal; - double long_channel_leakage_reduction_memcell; +class UCA : public Component { +public: + UCA(const DynamicParameter &dyn_p); + ~UCA(); + double compute_delays(double inrisetime); // returns outrisetime + void compute_power_energy(); + + DynamicParameter dp; + Bank bank; + + Htree2 *htree_in_add; + Htree2 *htree_in_data; + Htree2 *htree_out_data; + Htree2 *htree_in_search; + Htree2 *htree_out_search; + + powerDef power_routing_to_bank; + + uint32_t nbanks; + + int num_addr_b_bank; + int num_di_b_bank; + int num_do_b_bank; + int num_si_b_bank; + int num_so_b_bank; + int RWP, ERP, EWP, SCHP; + double area_all_dataramcells; + + double dyn_read_energy_from_closed_page; + double dyn_read_energy_from_open_page; + double dyn_read_energy_remaining_words_in_burst; + + double refresh_power; // only for DRAM + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; + + double delay_array_to_sa_mux_lev_1_decoder; + double delay_array_to_sa_mux_lev_2_decoder; + double delay_before_subarray_output_driver; + double delay_from_subarray_out_drv_to_out; + double access_time; + double precharge_delay; + double multisubbank_interleave_cycle_time; + double long_channel_leakage_reduction_periperal; + double long_channel_leakage_reduction_memcell; }; #endif - diff --git a/cacti/version_cacti.h b/cacti/version_cacti.h index 80931d5..b43ea04 100644 --- a/cacti/version_cacti.h +++ b/cacti/version_cacti.h @@ -34,10 +34,11 @@ #include -#define VER_MAJOR_CACTI 6 /* CACTI-P and DVS */ -#define VER_MINOR_CACTI 5 -const string VER_postfix_CACTI = "-P"; -#define VER_COMMENT_CACTI "CAM and fully associative cache, power gating, and DVS" -#define VER_UPDATE_CACTI "June, 2014" +#define VER_MAJOR_CACTI 6 /* CACTI-P and DVS */ +#define VER_MINOR_CACTI 5 +const string VER_postfix_CACTI = "-P"; +#define VER_COMMENT_CACTI \ + "CAM and fully associative cache, power gating, and DVS" +#define VER_UPDATE_CACTI "June, 2014" #endif /* VERSION_H_ */ diff --git a/cacti/wire.cc b/cacti/wire.cc index 05c6de9..432341c 100644 --- a/cacti/wire.cc +++ b/cacti/wire.cc @@ -30,24 +30,18 @@ ***************************************************************************/ #include "wire.h" + #include "cmath" // use this constructor to calculate wire stats -Wire::Wire( - enum Wire_type wire_model, - double wl, - int n, - double w_s, - double s_s, - enum Wire_placement wp, - double resistivity, - TechnologyParameter::DeviceType *dt - ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s), - resistivity(resistivity), deviceType(dt) -{ +Wire::Wire(enum Wire_type wire_model, double wl, int n, double w_s, double s_s, + enum Wire_placement wp, double resistivity, + TechnologyParameter::DeviceType *dt) + : wt(wire_model), wire_length(wl * 1e-6), nsense(n), w_scale(w_s), + s_scale(s_s), resistivity(resistivity), deviceType(dt) { wire_placement = wp; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; if (initialized != 1) { cout << "Wire not initialized. Initializing it with default values\n"; Wire winit; @@ -55,57 +49,63 @@ Wire::Wire( calculate_wire_stats(); // change everything back to seconds, microns, and Joules repeater_spacing *= 1e6; - wire_length *= 1e6; - wire_width *= 1e6; - wire_spacing *= 1e6; + wire_length *= 1e6; + wire_width *= 1e6; + wire_spacing *= 1e6; assert(wire_length > 0); assert(power.readOp.dynamic > 0); assert(power.readOp.leakage > 0); assert(power.readOp.gate_leakage > 0); } - // the following values are for peripheral global technology - // specified in the input config file - Component Wire::global; - Component Wire::global_5; - Component Wire::global_10; - Component Wire::global_20; - Component Wire::global_30; - Component Wire::low_swing; - - int Wire::initialized; - double Wire::wire_width_init; - double Wire::wire_spacing_init; - double Wire::repeater_size_init; // value used in initialization should not be reused in final output - double Wire::repeater_spacing_init; - - -Wire::Wire(double w_s, double s_s, /*bool reset_repeater_sizing,*/ enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt) -{ - w_scale = w_s; - s_scale = s_s; - deviceType = dt; +// the following values are for peripheral global technology +// specified in the input config file +Component Wire::global; +Component Wire::global_5; +Component Wire::global_10; +Component Wire::global_20; +Component Wire::global_30; +Component Wire::low_swing; + +int Wire::initialized; +double Wire::wire_width_init; +double Wire::wire_spacing_init; +double Wire::repeater_size_init; // value used in initialization should not be + // reused in final output +double Wire::repeater_spacing_init; + +Wire::Wire(double w_s, double s_s, + /*bool reset_repeater_sizing,*/ enum Wire_placement wp, double resis, + TechnologyParameter::DeviceType *dt) { + w_scale = w_s; + s_scale = s_s; + deviceType = dt; wire_placement = wp; - resistivity = resis; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; + resistivity = resis; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; - switch (wire_placement) - { - case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break; - case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break; - default: wire_width = g_tp.wire_local.pitch; break; + switch (wire_placement) { + case outside_mat: + wire_width = g_tp.wire_outside_mat.pitch; + break; + case inside_mat: + wire_width = g_tp.wire_inside_mat.pitch; + break; + default: + wire_width = g_tp.wire_local.pitch; + break; } wire_spacing = wire_width; - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; initialized = 1; init_wire(); - //init_wire(reset_repeater_sizing); + // init_wire(reset_repeater_sizing); wire_width_init = wire_width; wire_spacing_init = wire_spacing; @@ -114,126 +114,120 @@ Wire::Wire(double w_s, double s_s, /*bool reset_repeater_sizing,*/ enum Wire_pla assert(power.readOp.gate_leakage > 0); } +Wire::~Wire() {} - -Wire::~Wire() -{ -} - - - -void -Wire::calculate_wire_stats() -{ +void Wire::calculate_wire_stats() { if (wire_placement == outside_mat) { wire_width = g_tp.wire_outside_mat.pitch; - } - else if (wire_placement == inside_mat) { + } else if (wire_placement == inside_mat) { wire_width = g_tp.wire_inside_mat.pitch; - } - else { + } else { wire_width = g_tp.wire_local.pitch; } wire_spacing = wire_width; - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; - + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; if (wt != Low_swing) { - // delay_optimal_wire(); - - if (wt == Global) { - delay = global.delay * wire_length; - power.readOp.dynamic = global.power.readOp.dynamic * wire_length; - power.readOp.leakage = global.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; - repeater_spacing = global.area.w; - repeater_size = global.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_5) { - delay = global_5.delay * wire_length; - power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_5.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_5.area.w; - repeater_size = global_5.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_10) { - delay = global_10.delay * wire_length; - power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_10.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_10.area.w; - repeater_size = global_10.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_20) { - delay = global_20.delay * wire_length; - power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_20.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_20.area.w; - repeater_size = global_20.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_30) { - delay = global_30.delay * wire_length; - power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_30.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_30.area.w; - repeater_size = global_30.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - out_rise_time = delay*repeater_spacing/deviceType->Vth; + // delay_optimal_wire(); + + if (wt == Global) { + delay = global.delay * wire_length; + power.readOp.dynamic = global.power.readOp.dynamic * wire_length; + power.readOp.leakage = global.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = + global.power.readOp.gate_leakage * wire_length; + repeater_spacing = global.area.w; + repeater_size = global.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_5) { + delay = global_5.delay * wire_length; + power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_5.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = + global_5.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_5.area.w; + repeater_size = global_5.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_10) { + delay = global_10.delay * wire_length; + power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_10.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = + global_10.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_10.area.w; + repeater_size = global_10.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_20) { + delay = global_20.delay * wire_length; + power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_20.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = + global_20.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_20.area.w; + repeater_size = global_20.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_30) { + delay = global_30.delay * wire_length; + power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_30.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = + global_30.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_30.area.w; + repeater_size = global_30.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } + out_rise_time = delay * repeater_spacing / deviceType->Vth; - } - else if (wt == Low_swing) { - low_swing_model (); + } else if (wt == Low_swing) { + low_swing_model(); repeater_spacing = wire_length; repeater_size = 1; - } - else { + } else { assert(0); } -// if (g_ip->interconect_power_gated)//TODO:actual sleep txs need to be added as in the wordline drivers, -// //but since wires have enough space underneath for placement and routing of the sleep tx, the area overhead should be very small. -// //performance loss and energy overhead is also very small because of the property of sleep tx. -// { -// power.readOp.leakage = power.readOp.leakage/deviceType->Vdd*deviceType->Vcc_min; -// } - power.readOp.power_gated_leakage = power.readOp.leakage/deviceType->Vdd*deviceType->Vcc_min;//TODO: + // if (g_ip->interconect_power_gated)//TODO:actual sleep txs need to be added + // as in the wordline drivers, + // //but since wires have enough space underneath for placement and + // routing of the sleep tx, the area overhead should be very small. + // //performance loss and energy overhead is also very small + // because of the property of sleep tx. + // { + // power.readOp.leakage = + // power.readOp.leakage/deviceType->Vdd*deviceType->Vcc_min; + // } + power.readOp.power_gated_leakage = + power.readOp.leakage / deviceType->Vdd * deviceType->Vcc_min; // TODO: } - - /* * The fall time of an input signal to the first stage of a circuit is * assumed to be same as the fall time of the output signal of two * inverters connected in series (refer: CACTI 1 Technical report, * section 6.1.3) */ - double -Wire::signal_fall_time () -{ +double Wire::signal_fall_time() { /* rise time of inverter 1's output */ double rt; @@ -242,22 +236,23 @@ Wire::signal_fall_time () double timeconst; timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + rt = horowitz(0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + ft = horowitz(rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / + deviceType->Vth; return ft; } - - -double Wire::signal_rise_time () -{ +double Wire::signal_rise_time() { /* rise time of inverter 1's output */ double ft; @@ -266,20 +261,22 @@ double Wire::signal_rise_time () double timeconst; timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + rt = horowitz(0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / + deviceType->Vth; timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); - return ft; //sec + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + ft = horowitz(rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); + return ft; // sec } - - /* Wire resistance and capacitance calculations * wire width * @@ -294,111 +291,106 @@ double Wire::signal_rise_time () * */ -double Wire::wire_cap (double len /* in m */, bool call_from_outside) -{ - //TODO: this should be consistent with the wire_res in technology file +double Wire::wire_cap(double len /* in m */, bool call_from_outside) { + // TODO: this should be consistent with the wire_res in technology file double sidewall, adj, tot_cap; double wire_height; double epsilon0 = 8.8542e-12; - double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness; - - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_outside_mat.miller_value; - ild_thickness = g_tp.wire_outside_mat.ild_thickness; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_inside_mat.miller_value; - ild_thickness = g_tp.wire_inside_mat.ild_thickness; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; - miller_value = g_tp.wire_local.miller_value; - ild_thickness = g_tp.wire_local.ild_thickness; - break; - } + double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, + miller_value, ild_thickness; + + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + horiz_dielectric_constant = + g_tp.wire_outside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_outside_mat.miller_value; + ild_thickness = g_tp.wire_outside_mat.ild_thickness; + break; + } + case inside_mat: { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + horiz_dielectric_constant = + g_tp.wire_inside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_inside_mat.miller_value; + ild_thickness = g_tp.wire_inside_mat.ild_thickness; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; + miller_value = g_tp.wire_local.miller_value; + ild_thickness = g_tp.wire_local.ild_thickness; + break; + } } - if (call_from_outside) - { - wire_width *= 1e-6; - wire_spacing *= 1e-6; + if (call_from_outside) { + wire_width *= 1e-6; + wire_spacing *= 1e-6; } - wire_height = wire_width/w_scale*aspect_ratio; + wire_height = wire_width / w_scale * aspect_ratio; /* * assuming height does not change. wire_width = width_original*w_scale * So wire_height does not change as wire width increases */ -// capacitance between wires in the same level -// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) -// * epsilon0; - - sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) - * epsilon0; + // capacitance between wires in the same level + // sidewall = 2*miller_value * horiz_dielectric_constant * + // (wire_height/wire_spacing) + // * epsilon0; + sidewall = miller_value * horiz_dielectric_constant * + (wire_height / wire_spacing) * epsilon0; // capacitance between wires in adjacent levels - //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; - //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; + // adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; + // adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * + // epsilon0; - adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; - //Change ild_thickness from micron to M + adj = miller_value * vert_dielectric_constant * wire_width / + (ild_thickness * 1e-6) * epsilon0; + // Change ild_thickness from micron to M - //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m - tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m + // tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m + tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); // F/m - if (call_from_outside) - { - wire_width *= 1e6; - wire_spacing *= 1e6; + if (call_from_outside) { + wire_width *= 1e6; + wire_spacing *= 1e6; } - return (tot_cap*len); // (F) + return (tot_cap * len); // (F) } +double Wire::wire_res(double len /*(in m)*/) { + + double aspect_ratio, alpha_scatter = 1.05, dishing_thickness = 0, + barrier_thickness = 0; + // TODO: this should be consistent with the wire_res in technology file + // The whole computation should be consistent with the wire_res in + // technology.cc too! - double -Wire::wire_res (double len /*(in m)*/) -{ - - double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0; - //TODO: this should be consistent with the wire_res in technology file - //The whole computation should be consistent with the wire_res in technology.cc too! - - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - break; - } - } - return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)* - (wire_width-2*barrier_thickness))); + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + break; + } + case inside_mat: { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + break; + } + } + return (alpha_scatter * resistivity * 1e-6 * len / + ((aspect_ratio * wire_width / w_scale - dishing_thickness - + barrier_thickness) * + (wire_width - 2 * barrier_thickness))); } /* @@ -408,13 +400,10 @@ Wire::wire_res (double len /*(in m)*/) * low swing nmos delay, and the wire delay * (ref: Technical report 6) */ - void -Wire::low_swing_model() -{ +void Wire::low_swing_model() { double len = wire_length; double beta = pmos_to_nmos_sz_ratio(); - double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; /* Final nmos low swing driver size calculation: @@ -433,112 +422,117 @@ Wire::low_swing_model() #define RES_ADJ (8.6) // Increase in resistance due to low driving vol. - double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ; + double driver_res = (-8 * g_tp.FO4 / (log(0.5) * cwire)) / RES_ADJ; double nsize = R_to_w(driver_res, NCH); nsize = MIN(nsize, g_tp.max_w_nmos_); nsize = MAX(nsize, g_tp.min_w_nmos_); - if(rwire*cwire > 8*g_tp.FO4) - { + if (rwire * cwire > 8 * g_tp.FO4) { nsize = g_tp.max_w_nmos_; } // size the inverter appropriately to minimize the transmitter delay - // Note - In order to minimize leakage, we are not adding a set of inverters to - // bring down delay. Instead, we are sizing the single gate - // based on the logical effort. - double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0) - + gate_C(2*min_w_pmos, 0))); - double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff; - double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)); + // Note - In order to minimize leakage, we are not adding a set of inverters + // to bring down delay. Instead, we are sizing the single gate based on the + // logical effort. + double st_eff = + sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) / + (gate_C(2 * g_tp.min_w_nmos_, 0) + gate_C(2 * min_w_pmos, 0))); + double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff; + double inv_size = + req_cin / (gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)); inv_size = MAX(inv_size, 1); /* nand gate delay */ double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(inv_size*g_tp.min_w_nmos_, 0) + - gate_C(inv_size*min_w_pmos, 0); + drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(inv_size * g_tp.min_w_nmos_, 0) + + gate_C(inv_size * min_w_pmos, 0); double timeconst = res_eq * cap_eq; - delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, RISE); - double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd; + delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); + double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd; - inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ + inputrise = + delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ /* Inverter delay: * The load capacitance of this inv depends on * the gate capacitance of the final stage nmos * transistor which in turn depends on nsize */ - res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1); - cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(nsize, 0); + res_eq = tr_R_on(inv_size * min_w_pmos, PCH, 1); + cap_eq = drain_C_(inv_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(nsize, 0); timeconst = res_eq * cap_eq; - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, FALL); - temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd; - + delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL); + temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd; transmitter.delay = delay; - transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/ - transmitter.power.readOp.leakage = deviceType->Vdd * - (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); - - transmitter.power.readOp.gate_leakage = deviceType->Vdd * - (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + transmitter.power.readOp.dynamic = + temp_power * 2; /* since it is a diff. model*/ + transmitter.power.readOp.leakage = + deviceType->Vdd * + (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + + transmitter.power.readOp.gate_leakage = + deviceType->Vdd * + (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); inputrise = delay / deviceType->Vth; /* nmos delay + wire delay */ - cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 + - nsense * sense_amp_input_cap(); //+receiver cap + cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 + + nsense * sense_amp_input_cap(); //+receiver cap /* * NOTE: nmos is used as both pull up and pull down transistor * in the transmitter. This is because for low voltage swing, drive * resistance of nmos is less than pmos * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) */ - timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire + - drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) + - rwire*cwire/2 + - (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) * - nsense * sense_amp_input_cap(); + timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * + (cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) + + rwire * cwire / 2 + + (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) * nsense * + sense_amp_input_cap(); /* * since we are pre-equalizing and overdriving the low * swing wires, the net time constant is less * than the actual value */ - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0); + delay += + horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, .25, 0); #define VOL_SWING .1 - temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */ - temp_power *= 2; /* differential wire */ + temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */ + temp_power *= 2; /* differential wire */ l_wire.delay = delay - transmitter.delay; l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; - l_wire.power.readOp.leakage = deviceType->Vdd* - (4* cmos_Isub_leakage(nsize, 0, 1, nmos)); + l_wire.power.readOp.leakage = + deviceType->Vdd * (4 * cmos_Isub_leakage(nsize, 0, 1, nmos)); - l_wire.power.readOp.gate_leakage = deviceType->Vdd* - (4* cmos_Ig_leakage(nsize, 0, 1, nmos)); + l_wire.power.readOp.gate_leakage = + deviceType->Vdd * (4 * cmos_Ig_leakage(nsize, 0, 1, nmos)); - //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + // double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; delay += g_tp.sense_delay; sense_amp.delay = g_tp.sense_delay; - out_rise_time = g_tp.sense_delay/(deviceType->Vth); + out_rise_time = g_tp.sense_delay / (deviceType->Vth); sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; - sense_amp.power.readOp.leakage = 0; //FIXME + sense_amp.power.readOp.leakage = 0; // FIXME sense_amp.power.readOp.gate_leakage = 0; power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; @@ -546,175 +540,166 @@ Wire::low_swing_model() l_wire.power.readOp.leakage + sense_amp.power.readOp.leakage; power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + - l_wire.power.readOp.gate_leakage + - sense_amp.power.readOp.gate_leakage; + l_wire.power.readOp.gate_leakage + + sense_amp.power.readOp.gate_leakage; } - double -Wire::sense_amp_input_cap() -{ +double Wire::sense_amp_input_cap() { return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + - drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); + gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + + drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); } - -void Wire::delay_optimal_wire (/*bool reset_repeater_sizing*/) -{ - double len = wire_length; - //double min_wire_width = wire_width; //m +void Wire::delay_optimal_wire(/*bool reset_repeater_sizing*/) { + double len = wire_length; + // double min_wire_width = wire_width; //m double beta = pmos_to_nmos_sz_ratio(); - double switching = 0; // switching energy - double short_ckt = 0; // short-circuit energy - double tc = 0; // time constant + double switching = 0; // switching energy + double short_ckt = 0; // short-circuit energy + double tc = 0; // time constant // input cap of min sized driver double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); - // output parasitic capacitance of - // the min. sized driver + // output parasitic capacitance of + // the min. sized driver double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm + double out_res = + (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); // ohm // wire cap /m double wc = wire_cap(len); // size the repeater such that the delay of the wire is minimum - double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel + double repeater_scaling = + sqrt(out_res * wc / (wr * input_cap)); // len will cancel - // calc the optimum spacing between the repeaters (m) + // calc the optimum spacing between the repeaters (m) -// if (reset_repeater_sizing==true) { + // if (reset_repeater_sizing==true) { - repeater_spacing_init = sqrt(2 * out_res * (out_cap + input_cap)/ - ((wr/len)*(wc/len))); - repeater_size_init = repeater_scaling; -// } + repeater_spacing_init = + sqrt(2 * out_res * (out_cap + input_cap) / ((wr / len) * (wc / len))); + repeater_size_init = repeater_scaling; + // } switching = (repeater_scaling * (input_cap + out_cap) + - repeater_spacing_init * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + repeater_spacing_init * (wc / len)) * + deviceType->Vdd * deviceType->Vdd; tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing_init /repeater_scaling + - wr/len * repeater_spacing_init * input_cap * repeater_scaling + - 0.5 * (wr/len) * (wc/len)* repeater_spacing_init * repeater_spacing_init ; + out_res * wc / len * repeater_spacing_init / repeater_scaling + + wr / len * repeater_spacing_init * input_cap * repeater_scaling + + 0.5 * (wr / len) * (wc / len) * repeater_spacing_init * + repeater_spacing_init; - delay = 0.693 * tc * len/repeater_spacing_init ; + delay = 0.693 * tc * len / repeater_spacing_init; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_scaling * tc; - - area.set_area((len/repeater_spacing_init ) * - compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, - g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)); - power.readOp.dynamic = ((len/repeater_spacing_init )*(switching + short_ckt)); - power.readOp.leakage = ((len/repeater_spacing_init )* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); - power.readOp.gate_leakage = ((len/repeater_spacing_init )* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); + repeater_scaling * tc; + + area.set_area((len / repeater_spacing_init) * + compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, + g_tp.min_w_nmos_ * repeater_scaling, + g_tp.cell_h_def)); + power.readOp.dynamic = + ((len / repeater_spacing_init) * (switching + short_ckt)); + power.readOp.leakage = + ((len / repeater_spacing_init) * deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * repeater_scaling, + beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)); + power.readOp.gate_leakage = + ((len / repeater_spacing_init) * deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * repeater_scaling, + beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)); } - - -// calculate power/delay values for wires with suboptimal repeater sizing/spacing -void -Wire::init_wire(/*bool reset_repeater_sizing*/){ +// calculate power/delay values for wires with suboptimal repeater +// sizing/spacing +void Wire::init_wire(/*bool reset_repeater_sizing*/) { wire_length = 1; delay_optimal_wire(/*reset_repeater_sizing*/); - double sp, si; + double sp, si; powerDef pow; - si = repeater_size_init ; - sp = repeater_spacing_init ; + si = repeater_size_init; + sp = repeater_spacing_init; sp *= 1e6; // in microns double i, j, del; repeated_wire.push_back(Component()); - for (j=sp; j < 4*sp; j+=100) { + for (j = sp; j < 4 * sp; j += 100) { for (i = si; i > 1; i--) { - pow = wire_model(j*1e-6, i, &del); + pow = wire_model(j * 1e-6, i, &del); if (j == sp && i == si) { global.delay = del; global.power = pow; global.area.h = si; - global.area.w = sp*1e-6; // m + global.area.w = sp * 1e-6; // m } -// cout << "Repeater size - "<< i << -// " Repeater spacing - " << j << -// " Delay - " << del << -// " PowerD - " << pow.readOp.dynamic << -// " PowerL - " << pow.readOp.leakage <delay; low_swing.power = l_wire->power; delete l_wire; } - - -void Wire::update_fullswing() -{ +void Wire::update_fullswing() { list::iterator citer; double del[4]; - del[3] = this->global.delay + this->global.delay*.3; - del[2] = global.delay + global.delay*.2; - del[1] = global.delay + global.delay*.1; - del[0] = global.delay + global.delay*.05; + del[3] = this->global.delay + this->global.delay * .3; + del[2] = global.delay + global.delay * .2; + del[1] = global.delay + global.delay * .1; + del[0] = global.delay + global.delay * .05; double threshold; double ncost; double cost; int i = 4; - while (i>0) { - threshold = del[i-1]; + while (i > 0) { + threshold = del[i - 1]; cost = BIGNUM; - for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++) - { + for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++) { if (citer->delay > threshold) { citer = repeated_wire.erase(citer); - citer --; - } - else { - ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic + - citer->power.readOp.leakage/global.power.readOp.leakage; - if(ncost < cost) - { + citer--; + } else { + ncost = citer->power.readOp.dynamic / global.power.readOp.dynamic + + citer->power.readOp.leakage / global.power.readOp.leakage; + if (ncost < cost) { cost = ncost; if (i == 4) { global_30.delay = citer->delay; global_30.power = citer->power; - global_30.area = citer->area; - } - else if (i==3) { + global_30.area = citer->area; + } else if (i == 3) { global_20.delay = citer->delay; global_20.power = citer->power; - global_20.area = citer->area; - } - else if(i==2) { + global_20.area = citer->area; + } else if (i == 2) { global_10.delay = citer->delay; global_10.power = citer->power; - global_10.area = citer->area; - } - else if(i==1) { + global_10.area = citer->area; + } else if (i == 1) { global_5.delay = citer->delay; global_5.power = citer->power; - global_5.area = citer->area; + global_5.area = citer->area; } } } @@ -722,17 +707,16 @@ void Wire::update_fullswing() i--; } citer = repeated_wire.begin(); - while (!repeated_wire.empty()) //TODO: code optimize - {citer=repeated_wire.erase(citer);} + while (!repeated_wire.empty()) // TODO: code optimize + { + citer = repeated_wire.erase(citer); + } } - - -powerDef Wire::wire_model (double space, double size, double *delay) -{ +powerDef Wire::wire_model(double space, double size, double *delay) { powerDef ptemp; double len = 1; - //double min_wire_width = wire_width; //m + // double min_wire_width = wire_width; //m double beta = pmos_to_nmos_sz_ratio(); // switching energy double switching = 0; @@ -741,17 +725,16 @@ powerDef Wire::wire_model (double space, double size, double *delay) // time constant double tc = 0; // input cap of min sized driver - double input_cap = gate_C (g_tp.min_w_nmos_ + - min_w_pmos, 0); + double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); - // output parasitic capacitance of - // the min. sized driver + // output parasitic capacitance of + // the min. sized driver double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm + double out_res = + (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); // ohm // wire cap /m double wc = wire_cap(len); @@ -759,125 +742,171 @@ powerDef Wire::wire_model (double space, double size, double *delay) repeater_spacing = space; repeater_size = size; - switching = (repeater_size * (input_cap + out_cap) + - repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + switching = + (repeater_size * (input_cap + out_cap) + repeater_spacing * (wc / len)) * + deviceType->Vdd * deviceType->Vdd; tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing/repeater_size + - wr/len * repeater_spacing * out_cap * repeater_size + - 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + out_res * wc / len * repeater_spacing / repeater_size + + wr / len * repeater_spacing * out_cap * repeater_size + + 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing; - *delay = 0.693 * tc * len/repeater_spacing; + *delay = 0.693 * tc * len / repeater_spacing; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_size * tc; + repeater_size * tc; - ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); - ptemp.readOp.leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt)); + ptemp.readOp.leakage = + ((len / repeater_spacing) * deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * repeater_size, + beta * g_tp.min_w_nmos_ * repeater_size, 1, inv)); - ptemp.readOp.gate_leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + ptemp.readOp.gate_leakage = + ((len / repeater_spacing) * deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * repeater_size, + beta * g_tp.min_w_nmos_ * repeater_size, 1, inv)); return ptemp; } -void -Wire::print_wire() -{ +void Wire::print_wire() { cout << "\nWire Properties at DVS level 0:\n\n"; - cout << " Delay Optimal\n\tRepeater size - "<< global.area.h << - " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)" - " \n\tDelay - " << global.delay*1e6 << " (ns/mm)" - " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)" - " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)" - " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n"; - cout << "\tWire width - " <delay; - low_swing.power = l_wire->power; - delete l_wire; +void Wire::wire_dvs_update() { + double i, j, del; + powerDef pow; + pow = wire_model(global.area.w, global.area.h, &del); + global.delay = del; + global.power = pow; + pow = wire_model(global_5.area.w, global_5.area.h, &del); + global_5.delay = del; + global_5.power = pow; + pow = wire_model(global_10.area.w, global_10.area.h, &del); + global_10.delay = del; + global_10.power = pow; + pow = wire_model(global_20.area.w, global_20.area.h, &del); + global_20.delay = del; + global_20.power = pow; + pow = wire_model(global_30.area.w, global_30.area.h, &del); + global_30.delay = del; + global_30.power = pow; + + Wire *l_wire = new Wire(Low_swing, 0.001 /* 1 mm*/, 1); + low_swing.delay = l_wire->delay; + low_swing.power = l_wire->power; + delete l_wire; } diff --git a/cacti/wire.h b/cacti/wire.h index ce2ddf3..4f6df7e 100644 --- a/cacti/wire.h +++ b/cacti/wire.h @@ -29,99 +29,91 @@ * ***************************************************************************/ - - #ifndef __WIRE_H__ #define __WIRE_H__ +#include "assert.h" #include "basic_circuit.h" +#include "cacti_interface.h" #include "component.h" #include "parameter.h" -#include "assert.h" -#include "cacti_interface.h" + #include #include -class Wire : public Component -{ - public: - Wire(enum Wire_type wire_model, double len /* in u*/, - int nsense = 1/* no. of sense amps connected to the low-swing wire */, - double width_scaling = 1, - double spacing_scaling = 1, - enum Wire_placement wire_placement = outside_mat, - double resistivity = CU_RESISTIVITY, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); - ~Wire(); - - Wire( double width_scaling = 1, - double spacing_scaling = 1, -// bool reset_repeater_sizing = true, - enum Wire_placement wire_placement = outside_mat, - double resistivity = CU_RESISTIVITY, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ); // should be used only once for initializing static members - void init_wire(/*bool reset_repeater_sizing = true*/); - - void calculate_wire_stats(); - void delay_optimal_wire(/*bool reset_repeater_sizing = true*/); - double wire_cap(double len, bool call_from_outside=false); - double wire_res(double len); - void low_swing_model(); - double signal_fall_time(); - double signal_rise_time(); - double sense_amp_input_cap(); - - enum Wire_type wt; - double wire_spacing; - double wire_width; - enum Wire_placement wire_placement; - double repeater_size; - double repeater_spacing; - static double repeater_size_init; // value used in initialization should not be reused in final output - static double repeater_spacing_init; - double wire_length; - double in_rise_time, out_rise_time; - - void set_in_rise_time(double rt) - { - in_rise_time = rt; - } - static Component global; - static Component global_5; - static Component global_10; - static Component global_20; - static Component global_30; - static Component low_swing; - static double wire_width_init; - static double wire_spacing_init; - static void print_wire(); - void wire_dvs_update(); - - private: - - int nsense; // no. of sense amps connected to a low-swing wire if it - // is broadcasting data to multiple destinations - // width and spacing scaling factor can be used - // to model low level wires or special - // fat wires - double w_scale, s_scale; - double resistivity; - powerDef wire_model (double space, double size, double *delay); - list repeated_wire; - void update_fullswing(); - static int initialized; - - - //low-swing - Component transmitter; - Component l_wire; - Component sense_amp; - - double min_w_pmos; - - TechnologyParameter::DeviceType *deviceType; - +class Wire : public Component { +public: + Wire(enum Wire_type wire_model, double len /* in u*/, + int nsense = 1 /* no. of sense amps connected to the low-swing wire */, + double width_scaling = 1, double spacing_scaling = 1, + enum Wire_placement wire_placement = outside_mat, + double resistivity = CU_RESISTIVITY, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); + ~Wire(); + + Wire(double width_scaling = 1, double spacing_scaling = 1, + // bool reset_repeater_sizing = true, + enum Wire_placement wire_placement = outside_mat, + double resistivity = CU_RESISTIVITY, + TechnologyParameter::DeviceType *dt = + &(g_tp.peri_global)); // should be used only once for initializing + // static members + void init_wire(/*bool reset_repeater_sizing = true*/); + + void calculate_wire_stats(); + void delay_optimal_wire(/*bool reset_repeater_sizing = true*/); + double wire_cap(double len, bool call_from_outside = false); + double wire_res(double len); + void low_swing_model(); + double signal_fall_time(); + double signal_rise_time(); + double sense_amp_input_cap(); + + enum Wire_type wt; + double wire_spacing; + double wire_width; + enum Wire_placement wire_placement; + double repeater_size; + double repeater_spacing; + static double repeater_size_init; // value used in initialization should not + // be reused in final output + static double repeater_spacing_init; + double wire_length; + double in_rise_time, out_rise_time; + + void set_in_rise_time(double rt) { in_rise_time = rt; } + static Component global; + static Component global_5; + static Component global_10; + static Component global_20; + static Component global_30; + static Component low_swing; + static double wire_width_init; + static double wire_spacing_init; + static void print_wire(); + void wire_dvs_update(); + +private: + int nsense; // no. of sense amps connected to a low-swing wire if it + // is broadcasting data to multiple destinations + // width and spacing scaling factor can be used + // to model low level wires or special + // fat wires + double w_scale, s_scale; + double resistivity; + powerDef wire_model(double space, double size, double *delay); + list repeated_wire; + void update_fullswing(); + static int initialized; + + // low-swing + Component transmitter; + Component l_wire; + Component sense_amp; + + double min_w_pmos; + + TechnologyParameter::DeviceType *deviceType; }; #endif diff --git a/core.cc b/core.cc index 562fd0e..eda0210 100644 --- a/core.cc +++ b/core.cc @@ -29,1804 +29,2176 @@ * ***************************************************************************/ +#include "core.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" #include "io.h" #include "parameter.h" -#include "const.h" -#include "basic_circuit.h" -#include + #include -#include "XML_Parse.h" -#include -#include #include -#include "core.h" +#include +#include +#include //#include "globalvar.h" -InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IB (0), - BTB (0), - ID_inst (0), - ID_operand (0), - ID_misc (0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false, is_default = true; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; - //Assuming all L1 caches are virtually idxed physically tagged. - //cache - - size = (int)XML->sys.core[ithCore].icache.icache_config[0]; - line = (int)XML->sys.core[ithCore].icache.icache_config[1]; - assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; - banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // interface_ip.obj_func_dyn_energy = 0; - // interface_ip.obj_func_dyn_power = 0; - // interface_ip.obj_func_leak_power = 0; - // interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty); - scktRatio = g_tp.sckt_co_eff; - chip_PR_overhead = g_tp.chip_layout_overhead; - macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area); - area.set_area(area.get_area()+ icache.caches->local_result.area); - //output_data_csv(icache.caches.local_result); - - - /* - *iCache controllers - *miss buffer Each MSHR contains enough state - *to handle one or more accesses of any type to a single memory line. - *Due to the generality of the MSHR mechanism, - *the amount of state involved is non-trivial: - *including the address, pointers to the cache entry and destination register, - *written data, and various other pieces of state. - */ - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area); - area.set_area(area.get_area()+ icache.missb->local_result.area); - //output_data_csv(icache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area); - area.set_area(area.get_area()+ icache.ifb->local_result.area); - //output_data_csv(icache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area); - area.set_area(area.get_area()+ icache.prefetchb->local_result.area); - //output_data_csv(icache.prefetchb.local_result); - - //Instruction buffer - data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - interface_ip.pure_cam = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64? - XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions. - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - IB->area.set_area(IB->area.get_area()+ IB->local_result.area); - area.set_area(area.get_area()+ IB->local_result.area); - //output_data_csv(IB.IB.local_result); - - // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; - // inst_decoder.init_decoder(is_default, &interface_ip); - // inst_decoder.full_decoder_power(); - - if (coredynp.predictionW>0) - { - /* - * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged - * It is only a cache without all the buffers in the cache controller since it is more like a - * look up table than a cache with cache controller. When access miss, no load from other places - * such as main memory (not actively fill the misses), it is passively updated under two circumstances: - * 1) when BPT@ID stage finds out current is a taken branch while BTB missed - * 2) When BPT@ID stage predicts differently than BTB - * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) - * 4) when EXEU find out wrong target has been provided from BTB. - * - */ - size = XML->sys.core[ithCore].BTB.BTB_config[0]; - line = XML->sys.core[ithCore].BTB.BTB_config[1]; - assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; - banks = XML->sys.core[ithCore].BTB.BTB_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); -// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:size; - interface_ip.line_sz = debug?64:line; - interface_ip.assoc = debug?8:assoc; - interface_ip.nbanks = debug?1:banks; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area); - area.set_area(area.get_area()+ BTB->local_result.area); - ///cout<<"area="<area.get_area()); - } +InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), IB(0), BTB(0), ID_inst(0), ID_operand(0), ID_misc(0), + exist(exist_) { + if (!exist) + return; + int idx, tag, data, size, line, assoc, banks; + bool debug = false, is_default = true; - ID_inst = new inst_decoder(is_default, &interface_ip, - coredynp.opcode_length, 1/*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_operand = new inst_decoder(is_default, &interface_ip, - coredynp.arch_ireg_width, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, &interface_ip, - 8/* Prefix field etc upto 14B*/, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer. - //So the dynamic power should be multiplied by a few times. - area.set_area(area.get_area()+ (ID_inst->area.get_area() - +ID_operand->area.get_area() - +ID_misc->area.get_area())*coredynp.decodeW); + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; + // Assuming all L1 caches are virtually idxed physically tagged. + // cache + + size = (int)XML->sys.core[ithCore].icache.icache_config[0]; + line = (int)XML->sys.core[ithCore].icache.icache_config[1]; + assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; + banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + tag = debug ? 51 + : (int)XML->sys.physical_address_width - idx - + int(ceil(log2(line))) + EXTRA_TAG_BITS; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + debug ? 32768 : (int)XML->sys.core[ithCore].icache.icache_config[0]; + interface_ip.line_sz = + debug ? 64 : (int)XML->sys.core[ithCore].icache.icache_config[1]; + interface_ip.assoc = + debug ? 8 : (int)XML->sys.core[ithCore].icache.icache_config[2]; + interface_ip.nbanks = + debug ? 1 : (int)XML->sys.core[ithCore].icache.icache_config[3]; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].icache.icache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + // interface_ip.obj_func_dyn_energy = 0; + // interface_ip.obj_func_dyn_power = 0; + // interface_ip.obj_func_leak_power = 0; + // interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + icache.caches = new ArrayST(&interface_ip, "icache", Core_device, + coredynp.opt_local, coredynp.core_ty); + scktRatio = g_tp.sckt_co_eff; + chip_PR_overhead = g_tp.chip_layout_overhead; + macro_PR_overhead = g_tp.macro_layout_overhead; + icache.area.set_area(icache.area.get_area() + + icache.caches->local_result.area); + area.set_area(area.get_area() + icache.caches->local_result.area); + // output_data_csv(icache.caches.local_result); + + /* + *iCache controllers + *miss buffer Each MSHR contains enough state + *to handle one or more accesses of any type to a single memory line. + *Due to the generality of the MSHR mechanism, + *the amount of state involved is non-trivial: + *including the address, pointers to the cache entry and destination register, + *written data, and various other pieces of state. + */ + interface_ip.num_search_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + + icache.caches->l_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].icache.buffer_sizes[0] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / + clockRate; // means cycle time + interface_ip.latency = debug + ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / + clockRate; // means access time + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + + icache.missb->local_result.area); + area.set_area(area.get_area() + icache.missb->local_result.area); + // output_data_csv(icache.missb.local_result); + + // fill buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = icache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = data * XML->sys.core[ithCore].icache.buffer_sizes[1]; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); + area.set_area(area.get_area() + icache.ifb->local_result.area); + // output_data_csv(icache.ifb.local_result); + + // prefetch buffer + tag = XML->sys.physical_address_width + + EXTRA_TAG_BITS; // check with previous entries to decide wthether to + // merge. + data = icache.caches->l_ip + .line_sz; // separate queue to prevent from cache polution. + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = + XML->sys.core[ithCore].icache.buffer_sizes[2] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.prefetchb = + new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + + icache.prefetchb->local_result.area); + area.set_area(area.get_area() + icache.prefetchb->local_result.area); + // output_data_csv(icache.prefetchb.local_result); + + // Instruction buffer + data = + XML->sys.core[ithCore].instruction_length * + XML->sys.core[ithCore] + .peak_issue_width; // icache.caches.l_ip.line_sz; //multiple + // threads timing sharing the instruction buffer. + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.pure_cam = false; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].number_hardware_threads * + XML->sys.core[ithCore].instruction_buffer_size * + interface_ip.line_sz > + 64 + ? XML->sys.core[ithCore].number_hardware_threads * + XML->sys.core[ithCore].instruction_buffer_size * + interface_ip.line_sz + : 64; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + // NOTE: Assuming IB is time slice shared among threads, every fetch op will + // at least fetch "fetch width" instructions. + interface_ip.num_rw_ports = + debug + ? 1 + : XML->sys.core[ithCore] + .number_instruction_fetch_ports; // XML->sys.core[ithCore].fetch_width; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, + coredynp.core_ty); + IB->area.set_area(IB->area.get_area() + IB->local_result.area); + area.set_area(area.get_area() + IB->local_result.area); + // output_data_csv(IB.IB.local_result); + + // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; + // inst_decoder.init_decoder(is_default, &interface_ip); + // inst_decoder.full_decoder_power(); + + if (coredynp.predictionW > 0) { + /* + * BTB branch target buffer, accessed during IF stage. Virtually indexed and + * virtually tagged It is only a cache without all the buffers in the cache + * controller since it is more like a look up table than a cache with cache + * controller. When access miss, no load from other places such as main + * memory (not actively fill the misses), it is passively updated under two + * circumstances: 1) when BPT@ID stage finds out current is a taken branch + * while BTB missed 2) When BPT@ID stage predicts differently than BTB 3) + * When ID stage finds out current instruction is not a branch while BTB had + * a hit.(mark as invalid) 4) when EXEU find out wrong target has been + * provided from BTB. + * + */ + size = XML->sys.core[ithCore].BTB.BTB_config[0]; + line = XML->sys.core[ithCore].BTB.BTB_config[1]; + assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; + banks = XML->sys.core[ithCore].BTB.BTB_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + // tag = + // debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + + // int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + // +EXTRA_TAG_BITS; + tag = debug ? 51 + : XML->sys.virtual_address_width + + int(ceil(log2( + XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = debug ? 32768 : size; + interface_ip.line_sz = debug ? 64 : line; + interface_ip.assoc = debug ? 8 : assoc; + interface_ip.nbanks = debug ? 1 : banks; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].BTB.BTB_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].BTB.BTB_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); + area.set_area(area.get_area() + BTB->local_result.area); + /// cout<<"area="<area.get_area()); + } + ID_inst = new inst_decoder(is_default, &interface_ip, coredynp.opcode_length, + 1 /*Decoder should not know how many by itself*/, + coredynp.x86, Core_device, coredynp.core_ty); + + ID_operand = + new inst_decoder(is_default, &interface_ip, coredynp.arch_ireg_width, 1, + coredynp.x86, Core_device, coredynp.core_ty); + + ID_misc = new inst_decoder(is_default, &interface_ip, + 8 /* Prefix field etc upto 14B*/, 1, coredynp.x86, + Core_device, coredynp.core_ty); + // TODO: X86 decoder should decode the inst in cyclic mode under the control + // of squencer. So the dynamic power should be multiplied by a few times. + area.set_area(area.get_area() + + (ID_inst->area.get_area() + ID_operand->area.get_area() + + ID_misc->area.get_area()) * + coredynp.decodeW); } +BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), globalBPT(0), localBPT(0), L1_localBPT(0), + L2_localBPT(0), chooser(0), RAS(0), exist(exist_) { + /* + * Branch Predictor, accessed during ID stage. + * McPAT's branch predictor model is the tournament branch predictor used in + * Alpha 21264, including global predictor, local two level predictor, and + * Chooser. The Branch predictor also includes a RAS (return address stack) + * for function calls Branch predictors are tagged by thread ID and modeled as + * 1-way associative cache. However RAS return address stacks are duplicated + * for each thread. + * TODO:Data Width need to be computed more precisely * + */ + if (!exist) + return; + int tag, data; -BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - globalBPT(0), - localBPT(0), - L1_localBPT(0), - L2_localBPT(0), - chooser(0), - RAS(0), - exist(exist_) -{ - /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264, - * including global predictor, local two level predictor, and Chooser. - * The Branch predictor also includes a RAS (return address stack) for function calls - * Branch predictors are tagged by thread ID and modeled as 1-way associative cache. - * However RAS return address stacks are duplicated for each thread. - * TODO:Data Width need to be computed more precisely * - */ - if (!exist) return; - int tag, data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.assoc = 1; - interface_ip.pure_cam = false; - if (coredynp.multithreaded) - { - - tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - } - else - { - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - - } - //Global predictor - data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area); - area.set_area(area.get_area()+ globalBPT->local_result.area); - - //Local BPT (Level 1) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area); - area.set_area(area.get_area()+ L1_localBPT->local_result.area); - - //Local BPT (Level 2) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area); - area.set_area(area.get_area()+ L2_localBPT->local_result.area); - - //Chooser - data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area); - area.set_area(area.get_area()+ chooser->local_result.area); - - //RAS return address stacks are Duplicated for each thread. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - data = int(ceil(coredynp.pc_width/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); - area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); - + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + interface_ip.assoc = 1; + interface_ip.pure_cam = false; + if (coredynp.multithreaded) { + + tag = int(log2(coredynp.num_hthreads) + EXTRA_TAG_BITS); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + } else { + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + } + // Global predictor + data = + int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.global_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, + coredynp.opt_local, coredynp.core_ty); + globalBPT->area.set_area(globalBPT->area.get_area() + + globalBPT->local_result.area); + area.set_area(area.get_area() + globalBPT->local_result.area); + + // Local BPT (Level 1) + data = + int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0] / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.local_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, + coredynp.opt_local, coredynp.core_ty); + L1_localBPT->area.set_area(L1_localBPT->area.get_area() + + L1_localBPT->local_result.area); + area.set_area(area.get_area() + L1_localBPT->local_result.area); + + // Local BPT (Level 2) + data = + int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1] / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.local_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, + coredynp.opt_local, coredynp.core_ty); + L2_localBPT->area.set_area(L2_localBPT->area.get_area() + + L2_localBPT->local_result.area); + area.set_area(area.get_area() + L2_localBPT->local_result.area); + + // Chooser + data = + int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.chooser_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, + coredynp.opt_local, coredynp.core_ty); + chooser->area.set_area(chooser->area.get_area() + chooser->local_result.area); + area.set_area(area.get_area() + chooser->local_result.area); + + // RAS return address stacks are Duplicated for each thread. + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + data = int(ceil(coredynp.pc_width / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].RAS_size; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, + coredynp.core_ty); + RAS->area.set_area(RAS->area.get_area() + + RAS->local_result.area * coredynp.num_hthreads); + area.set_area(area.get_area() + + RAS->local_result.area * coredynp.num_hthreads); } -SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - int_inst_window(0), - fp_inst_window(0), - ROB(0), - instruction_selection(0), - exist(exist_) - { - if (!exist) return; - int tag, data; - bool is_default=true; - string tmp_name; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if ((coredynp.core_ty==Inorder && coredynp.multithreaded)) - { - //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors - tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design - data = XML->sys.core[ithCore].instruction_length; - //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures - //Software Developer’s Manual - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - //output_data_csv(iRS.RS.local_result); - Iw_height =int_inst_window->local_result.cache_ht; - - /* - * selection logic - * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up - * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who - * at the issue stage. - */ - interface_ip.assoc = 1; //reset to prevent unnecessary warning messages when init_interface - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, Core_device, coredynp.core_ty); - } - - if (coredynp.core_ty==OOO) - { - /* - * CAM based instruction window - * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored - * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored - * It is written once and read twice(two operands) before an instruction can be issued. - * X86 instruction can be very long up to 15B. add instruction length in XML - */ - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - tmp_name = "InstIssueQueue"; - } - else - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+ - 2*coredynp.int_data_width)/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - - tmp_name = "IntReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 2*1.0/clockRate; - interface_ip.latency = 2*1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - Iw_height =int_inst_window->local_result.cache_ht; - //FU inst window - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0)); - tmp_name = "FPIssueQueue"; - } - else - { - tag = 2*coredynp.phy_ireg_width; - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+ - 2*coredynp.fp_data_width)/8.0)); - tmp_name = "FPReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_issueW; - interface_ip.num_wr_ports = coredynp.fp_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - fp_Iw_height =fp_inst_window->local_result.cache_ht; - - if (XML->sys.core[ithCore].ROB_size >0) - { - /* - * if ROB_size = 0, then the target processor does not support hardware-based - * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which - * means branch must be resolved before instruction issued into instruction window, since - * there is no change to flush miss-predict branch path after instructions are issued in this situation. - * - * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. - * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. - * However, this approach is abandoned due to its high power and poor scalability. - * McPAT uses current implementation of ROB as circular buffer. - * ROB is written once when instruction is issued and read once when the instruction is committed. * - */ - - int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); - data = int(ceil((robExtra+coredynp.pc_width + ((coredynp.rm_ty ==RAMbased)? (coredynp.phy_ireg_width + coredynp.phy_freg_width) : fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width)) + ((coredynp.scheu_ty==PhysicalRegFile)? 0 : coredynp.fp_data_width ))/8.0)); - /* - * 5 bits are: busy, Issued, Finished, speculative, valid; - * PC is to id the instruction for recover exception/mis-prediction. - * When using RAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT, - * so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated; - * otherwise, the RAM-based RAT needs to support search ops to identify the target architecture register that needs to be updated, or the physical resigner that needs to be recycled; - * When using CAM-based RAT, ROB only needs to contain destination physical register since the CAM-base RAT can search for the corresponding ARF-PRF mapping - * to find the correct entry in the RAT, so that the correct architecture register (and freelist/bits) can be found and the RAT can be appropriately updated. - * ROB phy_reg entry should use the larger one from phy_ireg and phy_freg; fdata_width is always larger. - * Latest Intel Processors may have different ROB/RS designs. - */ - - - -/* - if(coredynp.scheu_ty==PhysicalRegFile) - { - //PC is to id the instruction for recover exception. - //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0)); - - if (coredynp.rm_ty ==RAMbased) - { - data = int(ceil((robExtra + coredynp.pc_width + (coredynp.phy_ireg_width, coredynp.phy_freg_width))/8.0)); - //When using RAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT, - //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated. - } - else if ((coredynp.rm_ty ==CAMbased)) - { - data = int(ceil((robExtra+coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width))/8.0)); - //When using CAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT, - //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated. - } - } - else - { - //in RS based OOO, ROB also contains value of destination reg -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - - //using phy_reg number to search in the RAT, the correct architecture register can be found and the RAT can be appropriately updated. - //ROB phy_reg entry should use the larger one from ireg and freg; fdata_width is always larger; Latest Intel Processors may have different ROB/RS designs. - data = int(ceil((robExtra + coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width) + coredynp.fp_data_width)/8.0)); - } -*/ - - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_commitW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - ROB_height =ROB->local_result.cache_ht; - } - - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); +SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), int_inst_window(0), fp_inst_window(0), ROB(0), + instruction_selection(0), exist(exist_) { + if (!exist) + return; + int tag, data; + bool is_default = true; + string tmp_name; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + if ((coredynp.core_ty == Inorder && coredynp.multithreaded)) { + // Instruction issue queue, in-order multi-issue or multithreaded processor + // also has this structure. Unified window for Inorder processors + tag = int(log2(XML->sys.core[ithCore].number_hardware_threads) * + coredynp.perThreadState); // This is the normal thread state bits + // based on Niagara Design + data = XML->sys.core[ithCore].instruction_length; + // NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and + // IA-32 Architectures Software Developer’s Manual + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].instruction_window_size * interface_ip.line_sz > + 64 + ? XML->sys.core[ithCore].instruction_window_size * + interface_ip.line_sz + : 64; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.peak_issueW; + int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, + coredynp.opt_local, coredynp.core_ty); + int_inst_window->area.set_area(int_inst_window->area.get_area() + + int_inst_window->local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window->local_result.area * coredynp.num_pipelines); + // output_data_csv(iRS.RS.local_result); + Iw_height = int_inst_window->local_result.cache_ht; + + /* + * selection logic + * In a single-issue Inorder multithreaded processor like Niagara, issue + * width=1*number_of_threads since the processor does need to pick up + * instructions from multiple ready ones(although these ready ones are from + * different threads).While SMT processors do not distinguish which thread + * belongs to who at the issue stage. + */ + interface_ip.assoc = + 1; // reset to prevent unnecessary warning messages when init_interface + instruction_selection = new selection_logic( + is_default, XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW * XML->sys.core[ithCore].number_hardware_threads, + &interface_ip, Core_device, coredynp.core_ty); + } + + if (coredynp.core_ty == OOO) { + /* + * CAM based instruction window + * For physicalRegFilebased OOO it is the instruction issue queue, where + * only tags of phy regs are stored For RS based OOO it is the Reservation + * station, where both tags and values of phy regs are stored It is written + * once and read twice(two operands) before an instruction can be issued. + * X86 instruction can be very long up to 15B. add instruction length in XML + */ + if (coredynp.scheu_ty == PhysicalRegFile) { + tag = coredynp.phy_ireg_width; + // Each time only half of the tag is compared, but two tag should be + // stored. This underestimate the search power + data = + int((ceil((coredynp.instruction_length + + 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width)) / + 2.0) / + 8.0)); + // Data width being divided by 2 means only after both operands available + // the whole data will be read out. This is modeled using two equivalent + // readouts with half of the data width + tmp_name = "InstIssueQueue"; + } else { + tag = coredynp.phy_ireg_width; + // Each time only half of the tag is compared, but two tag should be + // stored. This underestimate the search power + data = + int(ceil(((coredynp.instruction_length + + 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width) + + 2 * coredynp.int_data_width) / + 2.0) / + 8.0)); + // Data width being divided by 2 means only after both operands available + // the whole data will be read out. This is modeled using two equivalent + // readouts with half of the data width + + tmp_name = "IntReservationStation"; + } + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].instruction_window_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 2 * 1.0 / clockRate; + interface_ip.latency = 2 * 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.peak_issueW; + int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, + coredynp.opt_local, coredynp.core_ty); + int_inst_window->area.set_area(int_inst_window->area.get_area() + + int_inst_window->local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window->local_result.area * coredynp.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + // FU inst window + if (coredynp.scheu_ty == PhysicalRegFile) { + tag = 2 * coredynp.phy_freg_width; // TODO: each time only half of the tag + // is compared + data = + int(ceil((coredynp.instruction_length + + 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width)) / + 8.0)); + tmp_name = "FPIssueQueue"; + } else { + tag = 2 * coredynp.phy_ireg_width; + data = + int(ceil((coredynp.instruction_length + + 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width) + + 2 * coredynp.fp_data_width) / + 8.0)); + tmp_name = "FPReservationStation"; } + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].fp_instruction_window_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_issueW; + interface_ip.num_wr_ports = coredynp.fp_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.fp_issueW; + fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, + coredynp.opt_local, coredynp.core_ty); + fp_inst_window->area.set_area(fp_inst_window->area.get_area() + + fp_inst_window->local_result.area * + coredynp.num_fp_pipelines); + area.set_area(area.get_area() + fp_inst_window->local_result.area * + coredynp.num_fp_pipelines); + fp_Iw_height = fp_inst_window->local_result.cache_ht; + + if (XML->sys.core[ithCore].ROB_size > 0) { + /* + * if ROB_size = 0, then the target processor does not support + *hardware-based speculation, i.e. , the processor allow OOO issue as well + *as OOO completion, which means branch must be resolved before + *instruction issued into instruction window, since there is no change to + *flush miss-predict branch path after instructions are issued in this + *situation. + * + * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. + * One old approach is to combine the RAT and ROB as a huge CAM structure + *as in AMD K7. However, this approach is abandoned due to its high power + *and poor scalability. McPAT uses current implementation of ROB as + *circular buffer. ROB is written once when instruction is issued and read + *once when the instruction is committed. * + */ + + int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); + data = int(ceil( + (robExtra + coredynp.pc_width + + ((coredynp.rm_ty == RAMbased) + ? (coredynp.phy_ireg_width + coredynp.phy_freg_width) + : fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width)) + + ((coredynp.scheu_ty == PhysicalRegFile) ? 0 + : coredynp.fp_data_width)) / + 8.0)); + /* + * 5 bits are: busy, Issued, Finished, speculative, valid; + * PC is to id the instruction for recover + * exception/mis-prediction. When using RAM-based RAT, ROB needs to + * contain the ARF-PRF mapping to index the correct entry in the RAT, so + * that the correct architecture register (and freelist) can be found and + * the RAT can be appropriately updated; otherwise, the RAM-based RAT + * needs to support search ops to identify the target architecture + * register that needs to be updated, or the physical resigner that needs + * to be recycled; When using CAM-based RAT, ROB only needs to contain + * destination physical register since the CAM-base RAT can search for the + * corresponding ARF-PRF mapping to find the correct entry in the RAT, so + * that the correct architecture register (and freelist/bits) can be found + * and the RAT can be appropriately updated. ROB phy_reg entry should use + * the larger one from phy_ireg and phy_freg; fdata_width is always + * larger. Latest Intel Processors may have different ROB/RS designs. + */ + + /* + if(coredynp.scheu_ty==PhysicalRegFile) + { + //PC is to id the instruction for recover + exception. + //inst is used to map the renamed dest. + registers.so that commit stage can know which reg/RRAT to update + // data = + int(ceil((robExtra+coredynp.pc_width + + + // coredynp.instruction_length + + 2*coredynp.phy_ireg_width)/8.0)); + + if (coredynp.rm_ty ==RAMbased) + { + data = int(ceil((robExtra + + coredynp.pc_width + (coredynp.phy_ireg_width, + coredynp.phy_freg_width))/8.0)); + //When using RAM-based RAT, ROB + needs to contain the ARF-PRF mapping to index the correct entry in the + RAT, + //so that the correct architecture + register (and freelist) can be found and the RAT can be appropriately + updated. + } + else if ((coredynp.rm_ty ==CAMbased)) + { + data = + int(ceil((robExtra+coredynp.pc_width + fmax(coredynp.phy_ireg_width, + coredynp.phy_freg_width))/8.0)); + //When using CAM-based RAT, ROB + needs to contain the ARF-PRF mapping to index the correct entry in the + RAT, + //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated. + } + } + else + { + //in RS based OOO, ROB also contains value + of destination reg + // data = + int(ceil((robExtra+coredynp.pc_width + + + // coredynp.instruction_length + 2*coredynp.phy_ireg_width + + coredynp.fp_data_width)/8.0)); + + //using phy_reg number to search in the + RAT, the correct architecture register can be found and the RAT can be + appropriately updated. + //ROB phy_reg entry should use the larger + one from ireg and freg; fdata_width is always larger; Latest Intel + Processors may have different ROB/RS designs. data = int(ceil((robExtra + + coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width) + + coredynp.fp_data_width)/8.0)); + } + */ + + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore] + .ROB_size; // The XML ROB size is for all threads + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_commitW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + ROB->area.set_area(ROB->area.get_area() + + ROB->local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + ROB->local_result.area * coredynp.num_pipelines); + ROB_height = ROB->local_result.cache_ht; + } + + instruction_selection = new selection_logic( + is_default, XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); + } } -LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - LSQ(0), - LoadQ(0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false; - int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; - - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - //Dcache - size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area); - area.set_area(area.get_area()+ dcache.caches->local_result.area); - //output_data_csv(dcache.caches.local_result); - - //dCache controllers - //miss buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area); - area.set_area(area.get_area()+ dcache.missb->local_result.area); - //output_data_csv(dcache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area); - area.set_area(area.get_area()+ dcache.ifb->local_result.area); - //output_data_csv(dcache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area); - area.set_area(area.get_area()+ dcache.prefetchb->local_result.area); - //output_data_csv(dcache.prefetchb.local_result); - - //WBB - - if (cache_p==Write_back) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area); - area.set_area(area.get_area()+ dcache.wbb->local_result.area); - //output_data_csv(dcache.wbb.local_result); - } - - /* - * LSU--in-order processors do not have separate load queue: unified lsq - * partitioned among threads - * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - */ - tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS; - data = XML->sys.machine_bits; - interface_ip.is_cache = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()+ LSQ->local_result.area); - //output_data_csv(LSQ.LSQ.local_result); - lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()+ LoadQ->local_result.area); - //output_data_csv(LoadQ.LoadQ.local_result); - lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - } - area.set_area(area.get_area()*cdb_overhead); +LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), LSQ(0), LoadQ(0), exist(exist_) { + if (!exist) + return; + int idx, tag, data, size, line, assoc, banks; + bool debug = false; + int ldst_opcode = XML->sys.core[ithCore].opcode_width; // 16; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; + + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + // Dcache + size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; + line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; + assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; + banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + tag = debug ? 51 + : XML->sys.physical_address_width - idx - int(ceil(log2(line))) + + EXTRA_TAG_BITS; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + debug ? 32768 : (int)XML->sys.core[ithCore].dcache.dcache_config[0]; + interface_ip.line_sz = + debug ? 64 : (int)XML->sys.core[ithCore].dcache.dcache_config[1]; + interface_ip.assoc = + debug ? 8 : (int)XML->sys.core[ithCore].dcache.dcache_config[2]; + interface_ip.nbanks = + debug ? 1 : (int)XML->sys.core[ithCore].dcache.dcache_config[3]; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.is_cache = true; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug + ? 1 + : XML->sys.core[ithCore] + .memory_ports; // usually In-order has 1 and OOO has 2 at least. + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, + coredynp.opt_local, coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.caches->local_result.area); + area.set_area(area.get_area() + dcache.caches->local_result.area); + // output_data_csv(dcache.caches.local_result); + + // dCache controllers + // miss buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + + dcache.caches->l_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[0] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.missb->local_result.area); + area.set_area(area.get_area() + dcache.missb->local_result.area); + // output_data_csv(dcache.missb.local_result); + + // fill buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = dcache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = data * XML->sys.core[ithCore].dcache.buffer_sizes[1]; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + dcache.ifb->local_result.area); + area.set_area(area.get_area() + dcache.ifb->local_result.area); + // output_data_csv(dcache.ifb.local_result); + + // prefetch buffer + tag = XML->sys.physical_address_width + + EXTRA_TAG_BITS; // check with previous entries to decide wthether to + // merge. + data = dcache.caches->l_ip + .line_sz; // separate queue to prevent from cache polution. + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[2] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.prefetchb = + new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, + coredynp.opt_local, coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.prefetchb->local_result.area); + area.set_area(area.get_area() + dcache.prefetchb->local_result.area); + // output_data_csv(dcache.prefetchb.local_result); + + // WBB + + if (cache_p == Write_back) { + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = dcache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[3] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, + coredynp.opt_local, coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.wbb->local_result.area); + area.set_area(area.get_area() + dcache.wbb->local_result.area); + // output_data_csv(dcache.wbb.local_result); + } + + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both + * loadQ and StoreQ + */ + tag = ldst_opcode + XML->sys.virtual_address_width + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.machine_bits; + interface_ip.is_cache = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].store_buffer_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, + coredynp.opt_local, coredynp.core_ty); + LSQ->area.set_area(LSQ->area.get_area() + LSQ->local_result.area); + area.set_area(area.get_area() + LSQ->local_result.area); + // output_data_csv(LSQ.LSQ.local_result); + lsq_height = + LSQ->local_result.cache_ht * + sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].load_buffer_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, + coredynp.opt_local, coredynp.core_ty); + LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); + area.set_area(area.get_area() + LoadQ->local_result.area); + // output_data_csv(LoadQ.LoadQ.local_result); + lsq_height = + (LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht) * + sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + } + area.set_area(area.get_area() * cdb_overhead); } -MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - itlb(0), - dtlb(0), - exist(exist_) -{ - if (!exist) return; - int tag, data; - bool debug= false; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.specific_tag = 1; - //Itlb TLBs are partioned among threads according to Nigara and Nehalem - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area); - area.set_area(area.get_area()+ itlb->local_result.area); - //output_data_csv(itlb.tlb.local_result); - - //dtlb - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area); - area.set_area(area.get_area()+ dtlb->local_result.area); - //output_data_csv(dtlb.tlb.local_result); +MemManU::MemManU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), itlb(0), dtlb(0), exist(exist_) { + if (!exist) + return; + int tag, data; + bool debug = false; + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.specific_tag = 1; + // Itlb TLBs are partioned among threads according to Nigara and Nehalem + tag = XML->sys.virtual_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))) + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.physical_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))); + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].itlb.number_entries * + interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, + coredynp.core_ty); + itlb->area.set_area(itlb->area.get_area() + itlb->local_result.area); + area.set_area(area.get_area() + itlb->local_result.area); + // output_data_csv(itlb.tlb.local_result); + + // dtlb + tag = XML->sys.virtual_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))) + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.physical_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].dtlb.number_entries * + interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, + coredynp.core_ty); + dtlb->area.set_area(dtlb->area.get_area() + dtlb->local_result.area); + area.set_area(area.get_area() + dtlb->local_result.area); + // output_data_csv(dtlb.tlb.local_result); } -RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IRF (0), - FRF (0), - RFWIN (0), - exist(exist_) - { - /* - * processors have separate architectural register files for each thread. - * therefore, the bypass buses need to travel across all the register files. - */ - - if (!exist) return; - int data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - //**********************************IRF*************************************** - data = coredynp.int_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*coredynp.num_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)); - area.set_area(area.get_area()+ IRF->local_result.area*coredynp.num_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(IRF.RF.local_result); - - //**********************************FRF*************************************** - data = coredynp.fp_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; - interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*coredynp.num_fp_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)); - area.set_area(area.get_area()+ FRF->local_result.area*coredynp.num_fp_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(FRF.RF.local_result); - int_regfile_height= IRF->local_result.cache_ht*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)*sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)*sqrt(cdb_overhead); - //since a EXU is associated with each pipeline, the cdb should not have longer length. - if (coredynp.regWindowing) - { - //*********************************REG_WIN************************************ - data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 4.0/clockRate; - interface_ip.latency = 4.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - //output_data_csv(RFWIN.RF.local_result); - } - - - } - -EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - lsq_height(lsq_height_), - coredynp(dyn_p_), - rfu(0), - scheu(0), - fp_u(0), - exeu(0), - mul(0), - int_bypass(0), - intTagBypass(0), - int_mul_bypass(0), - intTag_mul_Bypass(0), - fp_bypass(0), - fpTagBypass(0), - exist(exist_) -{ - bool exist_flag = true; - if (!exist) return; - double fu_height = 0.0; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip,coredynp); - scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp); - exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU); - area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() ); - fu_height = exeu->FU_height; - if (coredynp.num_fpus >0) - { - fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU); - area.set_area(area.get_area()+ fp_u->area.get_area()); - } - if (coredynp.num_muls >0) - { - mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL); - area.set_area(area.get_area()+ mul->area.get_area()); - fu_height += mul->FU_height; - } - /* - * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast - * integer by pass has two paths and fp has 3 paths. - * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus - */ - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used - interface_ip.wire_os_mat_type = 2; - interface_ip.throughput = 10.0/clockRate; //Do not care - interface_ip.latency = 10.0/clockRate; - } - - if (coredynp.core_ty==Inorder) - { - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - {//OOO - if (coredynp.scheu_ty==PhysicalRegFile) - { - /* For physical register based OOO, - * data broadcast interconnects cover across functional units, lsq, inst windows and register files, - * while tag broadcast interconnects also cover across ROB - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - { - /* - * In RS based processor both data and tag are broadcast together, - * covering functional units, lsq, nst windows, register files, and ROBs - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - - - } - area.set_area(area.get_area()+ bypass.area.get_area()); +RegFU::RegFU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), IRF(0), FRF(0), RFWIN(0), exist(exist_) { + /* + * processors have separate architectural register files for each thread. + * therefore, the bypass buses need to travel across all the register files. + */ + + if (!exist) + return; + int data; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + //**********************************IRF*************************************** + data = coredynp.int_data_width; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.cache_sz = coredynp.num_IRF_entry * interface_ip.line_sz; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 2 * coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, + coredynp.opt_local, coredynp.core_ty); + IRF->area.set_area(IRF->area.get_area() + + IRF->local_result.area * coredynp.num_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + IRF->local_result.area * coredynp.num_pipelines * cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + // area.set_area(area.get_area()*cdb_overhead); + // output_data_csv(IRF.RF.local_result); + + //**********************************FRF*************************************** + data = coredynp.fp_data_width; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.cache_sz = coredynp.num_FRF_entry * interface_ip.line_sz; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 2 * XML->sys.core[ithCore].issue_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; + interface_ip.num_se_rd_ports = 0; + FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, + coredynp.opt_local, coredynp.core_ty); + FRF->area.set_area(FRF->area.get_area() + + FRF->local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + FRF->local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + // area.set_area(area.get_area()*cdb_overhead); + // output_data_csv(FRF.RF.local_result); + int_regfile_height = IRF->local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); + fp_regfile_height = FRF->local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); + // since a EXU is associated with each pipeline, the cdb should not have + // longer length. + if (coredynp.regWindowing) { + //*********************************REG_WIN************************************ + data = + coredynp + .int_data_width; // ECC, and usually 2 regs are transfered together + // during window shifting.Niagara Mega cell + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size * + IRF->l_ip.cache_sz * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 4.0 / clockRate; + interface_ip.latency = 4.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, + coredynp.opt_local, coredynp.core_ty); + RFWIN->area.set_area(RFWIN->area.get_area() + + RFWIN->local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + RFWIN->local_result.area * coredynp.num_pipelines); + // output_data_csv(RFWIN.RF.local_result); + } } -RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - iFRAT(0), - fFRAT(0), - iRRAT(0), - fRRAT(0), - ifreeL(0), - ffreeL(0), - idcl(0), - fdcl(0), - RAHT(0), - exist(exist_) - { - /* - * Although renaming logic maybe be used in in-order processors, - * McPAT assumes no renaming logic is used since the performance gain is very limited and - * the only major inorder processor with renaming logic is Itainium - * that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all when context switch - * - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * RAM-based RAT is duplicated/partitioned for each different hardware threads - * CAM-based RAT is shared for all hardware threads - * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N (N-way SMT) sets of entries, with each set for a thread. - * The RAT control logic will determine different sets to use for different threads. But it does not need extra tag bits in the entries. - * However, CAM-based RAT need extra tag bits to distinguish the architecture register ids for different threads. - - * - * checkpointing of RAT and RRAT are both for architecture state recovery with events including mis-speculation; - * Checkpointing is easier to implement in CAM than in RAM based RAT, despite of the inferior scalabilty of the CAM-based RATs. - * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 checkpoints (based on MIPS designs) for RAM based RATs, - * thus CAM-based RAT does not need RRAT - * Although no Dual-RAT is needed in RS-based OOO processors, since archi RegFile contains the committed register values, - * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT. - * - * RAM-base RAT does not need to scan/search all contents during instruction commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is used for index the RAT entry to be updated. - * - * Both RAM and CAM have same DCL - * - - * - */ - if (!exist) return; - int tag, data, out_w; -// interface_ip.wire_is_mat_type = 0; -// interface_ip.wire_os_mat_type = 0; -// interface_ip.wt = Global_30; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if (coredynp.core_ty==OOO) - { - //integer pipeline - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explanation. - data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));//33; - out_w = int(ceil(coredynp.phy_ireg_width/8.0));//bytes - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT floating point - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width + coredynp.hthread_width; - data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint )/8.0));//each checkpoint in the CAM-based RAT design needs only 1 bit, see "a power-aware hybrid ram-cam renaming mechanism for fast recovery" - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT for FP - tag = coredynp.arch_freg_width + coredynp.hthread_width; - data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//each checkpoint in the CAM-based RAT design needs only 1 bit, see "a power-aware hybrid ram-cam renaming mechanism for fast recovery" - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - - //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping - //RRAT is not needed for CAM-based RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is not needed for RAM-based RAT with checkpoints - //McPAT assumes renaming unit to have RRAT when there is no checkpoints in FRAT, while MIPS R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with FRAT is very costly, especially for high issue width and high clock rate. - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area); - area.set_area(area.get_area()+ iRRAT->area.get_area()); - - //RRAT for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area); - area.set_area(area.get_area()+ fRRAT->area.get_area()); - } - //Freelist of renaming unit always RAM based and needed for RAM-based RATs. - //Although it can be implemented within the CAM-based RAT, - //Current McPAT does not have the free bits in the CAM but use the same external free list as a close approximation for CAM RAT. - //Recycle happens at two places: 1)when DCL check there are WAW, the Phy-registers/ROB directly recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; - //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers - interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area); - area.set_area(area.get_area()+ ifreeL->area.get_area()); - - //freelist for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ffreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, "FP Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area); - area.set_area(area.get_area()+ ffreeL->area.get_area()); - - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); - - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased){ - - data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_ireg_width/8.0));//GC does not need to be readout - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->local_result.adjust_area(); -// iFRAT->local_result.power.readOp.dynamic *= 1+0.2*0.05;//1+mis-speculation% TODO -// iFRAT->local_result.power.writeOp.dynamic *=1+0.2*0.05;//compensate for GC - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FP - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->local_result.adjust_area(); -// fFRAT->local_result.power.readOp.dynamic *= 1+0.2*0.05;//1+mis-speculation% TODO -// fFRAT->local_result.power.writeOp.dynamic *=1+0.2*0.05;//compensate for GC - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width + coredynp.hthread_width; - data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil (coredynp.arch_ireg_width/8.0));//GC bits does not need to be sent out - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT - tag = coredynp.arch_freg_width + coredynp.hthread_width; - data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - //Although no RRAT for RS based OOO is really needed since the archiRF always holds the non-speculative data, having the RRAT or GC (not both) can help the recovery of mis-speculations. - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area); - area.set_area(area.get_area()+ iRRAT->area.get_area()); - - //RRAT for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area); - area.set_area(area.get_area()+ fRRAT->area.get_area()); - } - - //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - //ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); - - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); - } +EXECU::EXECU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, double lsq_height_, + const CoreDynParam &dyn_p_, bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), fp_u(0), + exeu(0), mul(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), + intTag_mul_Bypass(0), fp_bypass(0), fpTagBypass(0), exist(exist_) { + bool exist_flag = true; + if (!exist) + return; + double fu_height = 0.0; + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + rfu = new RegFU(XML, ithCore, &interface_ip, coredynp); + scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); + exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); + area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + + scheu->area.get_area()); + fu_height = exeu->FU_height; + if (coredynp.num_fpus > 0) { + fp_u = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, FPU); + area.set_area(area.get_area() + fp_u->area.get_area()); + } + if (coredynp.num_muls > 0) { + mul = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, MUL); + area.set_area(area.get_area() + mul->area.get_area()); + fu_height += mul->FU_height; + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; + * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the + * same bus there are multiple tri-state drivers and muxes that go to + * different components on the same bus + */ + if (XML->sys.Embedded) { + interface_ip.wt = Global_30; + interface_ip.wire_is_mat_type = 0; + interface_ip.wire_os_mat_type = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + } else { + interface_ip.wt = Global; + interface_ip.wire_is_mat_type = + 2; // start from semi-global since local wires are already used + interface_ip.wire_os_mat_type = 2; + interface_ip.throughput = 10.0 / clockRate; // Do not care + interface_ip.latency = 10.0 / clockRate; + } + + if (coredynp.core_ty == Inorder) { + int_bypass = new interconnect( + "Int Bypass Data", Core_device, 1, 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32), + rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, + 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); + intTagBypass = new interconnect( + "Int Bypass tag", Core_device, 1, 1, coredynp.perThreadState, + rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu->Iw_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + + if (coredynp.num_muls > 0) { + int_mul_bypass = new interconnect( + "Mul Bypass Data", Core_device, 1, 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + intTag_mul_Bypass = new interconnect( + "Mul Bypass tag", Core_device, 1, 1, coredynp.perThreadState, + rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + + if (coredynp.num_fpus > 0) { + fp_bypass = new interconnect( + "FP Bypass Data", Core_device, 1, 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, false, + 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); + fpTagBypass = new interconnect( + "FP Bypass tag", Core_device, 1, 1, coredynp.perThreadState, + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->Iw_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } else { // OOO + if (coredynp.scheu_ty == PhysicalRegFile) { + /* For physical register based OOO, + * data broadcast interconnects cover across functional units, lsq, inst + * windows and register files, while tag broadcast interconnects also + * cover across ROB + */ + int_bypass = new interconnect( + "Int Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, + 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_bypass->area.get_area()); + intTagBypass = new interconnect( + "Int Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + + if (coredynp.num_muls > 0) { + int_mul_bypass = new interconnect( + "Mul Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTag_mul_Bypass = new interconnect( + "Mul Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + if (coredynp.num_fpus > 0) { + fp_bypass = new interconnect("FP Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u->FU_height, + &interface_ip, 3, false, 1.0, + coredynp.opt_local, coredynp.core_ty); + fpTagBypass = new interconnect( + "FP Bypass tag", Core_device, 1, 1, coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fp_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } else { + /* + * In RS based processor both data and tag are broadcast together, + * covering functional units, lsq, nst windows, register files, and ROBs + */ + int_bypass = new interconnect( + "Int Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTagBypass = new interconnect( + "Int Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + if (coredynp.num_muls > 0) { + int_mul_bypass = new interconnect( + "Mul Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTag_mul_Bypass = new interconnect( + "Mul Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + + if (coredynp.num_fpus > 0) { + fp_bypass = new interconnect( + "FP Bypass Data", Core_device, 1, 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + fpTagBypass = new interconnect( + "FP Bypass tag", Core_device, 1, 1, coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fp_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } + } + area.set_area(area.get_area() + bypass.area.get_area()); } - if (coredynp.core_ty==Inorder&& coredynp.issueW>1) - { - /* Dependency check logic will only present when decode(issue) width>1. - * Multiple issue in order processor can do without renaming, but dcl is a must. - */ - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + +RENAMINGU::RENAMINGU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), iFRAT(0), fFRAT(0), iRRAT(0), fRRAT(0), ifreeL(0), + ffreeL(0), idcl(0), fdcl(0), RAHT(0), exist(exist_) { + /* + * Although renaming logic maybe be used in in-order processors, +* McPAT assumes no renaming logic is used since the performance gain is very +limited and +* the only major inorder processor with renaming logic is Itainium +* that is a VLIW processor and different from current McPAT's model. + * physical register base OOO must have Dual-RAT architecture or equivalent +structure.FRAT:FrontRAT, RRAT:RetireRAT; + * i,f prefix mean int and fp + * RAT for all Renaming logic, random accessible checkpointing is used, but +only update when instruction retires. + * FRAT will be read twice and written once per instruction; + * RRAT will be write once per instruction when committing and reads out all +when context switch + * + * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, + * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, + * + * RAM-based RAT is duplicated/partitioned for each different hardware threads + * CAM-based RAT is shared for all hardware threads + * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N +(N-way SMT) sets of entries, with each set for a thread. + * The RAT control logic will determine different sets to use for different +threads. But it does not need extra tag bits in the entries. + * However, CAM-based RAT need extra tag bits to distinguish the architecture +register ids for different threads. + + * + * checkpointing of RAT and RRAT are both for architecture state recovery with +events including mis-speculation; + * Checkpointing is easier to implement in CAM than in RAM based RAT, despite +of the inferior scalabilty of the CAM-based RATs. + * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 +checkpoints (based on MIPS designs) for RAM based RATs, + * thus CAM-based RAT does not need RRAT + * Although no Dual-RAT is needed in RS-based OOO processors, since archi +RegFile contains the committed register values, + * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, +when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT. + * + * RAM-base RAT does not need to scan/search all contents during instruction +commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is +used for index the RAT entry to be updated. + * + * Both RAM and CAM have same DCL + * + + * + */ + if (!exist) + return; + int tag, data, out_w; + // interface_ip.wire_is_mat_type = 0; + // interface_ip.wire_os_mat_type = 0; + // interface_ip.wt = Global_30; + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + if (coredynp.core_ty == OOO) { + // integer pipeline + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == + RAMbased) { // FRAT with global checkpointing (GCs) please see paper + // tech report for detailed explanation. + data = int(ceil(coredynp.phy_ireg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); // 33; + out_w = int(ceil(coredynp.phy_ireg_width / 8.0)); // bytes + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_IRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT floating point + data = int(ceil(coredynp.phy_freg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_FRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((coredynp.rm_ty == CAMbased)) { + // FRAT + tag = coredynp.arch_ireg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // each checkpoint in the CAM-based RAT design needs + // only 1 bit, see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + out_w = int(ceil(coredynp.arch_ireg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.decodeW; + iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT for FP + tag = coredynp.arch_freg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // each checkpoint in the CAM-based RAT design needs + // only 1 bit, see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + out_w = int(ceil(coredynp.arch_freg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; + fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + } + + // RRAT is always RAM based, does not have GCs, and is used only for + // record latest non-speculative mapping RRAT is not needed for CAM-based + // RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is + // not needed for RAM-based RAT with checkpoints McPAT assumes renaming + // unit to have RRAT when there is no checkpoints in FRAT, while MIPS + // R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with + // FRAT is very costly, especially for high issue width and high clock + // rate. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + + // RRAT for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); + } + // Freelist of renaming unit always RAM based and needed for RAM-based + // RATs. Although it can be implemented within the CAM-based RAT, Current + // McPAT does not have the free bits in the CAM but use the same external + // free list as a close approximation for CAM RAT. Recycle happens at two + // places: 1)when DCL check there are WAW, the Phy-registers/ROB directly + // recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + // therefore num_wr port = decode-1(-1 means at least one phy reg will be + // used for the current renaming group) + commit width + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // TODO + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = + coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; + // every cycle, (coredynp.decodeW -1) inst may need to send back it dest + // tags, committW insts needs to update freelist buffers + interface_ip.num_se_rd_ports = 0; + ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, + coredynp.opt_local, coredynp.core_ty); + ifreeL->area.set_area(ifreeL->area.get_area() + + ifreeL->local_result.area); + area.set_area(area.get_area() + ifreeL->area.get_area()); + + // freelist for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ffreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = + coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + ffreeL = new ArrayST(&interface_ip, "FP Free List", Core_device, + coredynp.opt_local, coredynp.core_ty); + ffreeL->area.set_area(ffreeL->area.get_area() + + ffreeL->local_result.area); + area.set_area(area.get_area() + ffreeL->area.get_area()); + + idcl = new dep_resource_conflict_check( + &interface_ip, coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, + coredynp.phy_freg_width); + + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + + data = int(ceil(coredynp.phy_ireg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_ireg_width / + 8.0)); // GC does not need to be readout + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_IRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iFRAT->local_result.adjust_area(); + // iFRAT->local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // iFRAT->local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FP + data = int(ceil(coredynp.phy_freg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_FRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fFRAT->local_result.adjust_area(); + // fFRAT->local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // fFRAT->local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((coredynp.rm_ty == CAMbased)) { + // FRAT + tag = coredynp.arch_ireg_width + coredynp.hthread_width; + data = int(ceil( + (coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.arch_ireg_width / + 8.0)); // GC bits does not need to be sent out + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.decodeW; + iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT + tag = coredynp.arch_freg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // the address of CAM needed to be sent out + out_w = int(ceil(coredynp.arch_freg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = + XML->sys.core[ithCore].decode_width; // 0;TODO; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; + fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + } + // Although no RRAT for RS based OOO is really needed since the archiRF + // always holds the non-speculative data, having the RRAT or GC (not both) + // can help the recovery of mis-speculations. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + + // RRAT for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, + coredynp.opt_local, coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); + } + + // Freelist of renaming unit of RS based OOO is unifed for both int and fp + // renaming unit since the ROB is unified + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // TODO + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = + coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, + coredynp.opt_local, coredynp.core_ty); + // ifreeL->area.set_area(ifreeL->area.get_area()+ + // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); + area.set_area(area.get_area() + ifreeL->area.get_area()); + + idcl = new dep_resource_conflict_check( + &interface_ip, coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, + coredynp.phy_freg_width); } + } + if (coredynp.core_ty == Inorder && coredynp.issueW > 1) { + /* Dependency check logic will only present when decode(issue) width>1. + * Multiple issue in order processor can do without renaming, but dcl is a + * must. + */ + idcl = new dep_resource_conflict_check( + &interface_ip, coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, + coredynp.phy_freg_width); + } } -Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - ifu (0), - lsu (0), - mmu (0), - exu (0), - rnu (0), - corepipe (0), - undiffCore (0), - l2cache (0) -{ - /* - * initialize, compute and optimize individual components. - */ +Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + ifu(0), lsu(0), mmu(0), exu(0), rnu(0), corepipe(0), undiffCore(0), + l2cache(0) { + /* + * initialize, compute and optimize individual components. + */ bool exit_flag = true; @@ -1836,2602 +2208,3562 @@ Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) // interface_ip.wt =Global_30; set_core_param(); - if (XML->sys.Private_L2) - { - l2cache = new SharedCache(XML,ithCore, &interface_ip); - + if (XML->sys.Private_L2) { + l2cache = new SharedCache(XML, ithCore, &interface_ip); } clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp,exit_flag); - lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp,exit_flag); - mmu = new MemManU (XML, ithCore, &interface_ip,coredynp,exit_flag); - exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp,exit_flag); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp,exit_flag); - if (coredynp.core_ty==OOO) - { - rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp); + ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); + lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); + mmu = new MemManU(XML, ithCore, &interface_ip, coredynp, exit_flag); + exu = new EXECU(XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, + exit_flag); + undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); + if (coredynp.core_ty == OOO) { + rnu = new RENAMINGU(XML, ithCore, &interface_ip, coredynp); } - corepipe = new Pipeline(&interface_ip,coredynp); - - if (coredynp.core_ty==OOO) - { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0; - if (rnu->exist) - { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); - } - } - else { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0; + corepipe = new Pipeline(&interface_ip, coredynp); + + if (coredynp.core_ty == OOO) { + pipeline_area_per_unit = + (corepipe->area.get_area() * coredynp.num_pipelines) / 5.0; + if (rnu->exist) { + rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + } + } else { + pipeline_area_per_unit = + (corepipe->area.get_area() * coredynp.num_pipelines) / 4.0; } - //area.set_area(area.get_area()+ corepipe->area.get_area()); - if (ifu->exist) - { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); + // area.set_area(area.get_area()+ corepipe->area.get_area()); + if (ifu->exist) { + ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu->area.get_area()); } - if (lsu->exist) - { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); + if (lsu->exist) { + lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu->area.get_area()); } - if (exu->exist) - { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+exu->area.get_area()); + if (exu->exist) { + exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu->area.get_area()); } - if (mmu->exist) - { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+mmu->area.get_area()); + if (mmu->exist) { + mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu->area.get_area()); } - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { + if (coredynp.core_ty == OOO) { + if (rnu->exist) { - area.set_area(area.get_area() + rnu->area.get_area()); - } + area.set_area(area.get_area() + rnu->area.get_area()); + } } - if (undiffCore->exist) - { - area.set_area(area.get_area() + undiffCore->area.get_area()); + if (undiffCore->exist) { + area.set_area(area.get_area() + undiffCore->area.get_area()); } - if (XML->sys.Private_L2) - { - area.set_area(area.get_area() + l2cache->area.get_area()); + if (XML->sys.Private_L2) { + area.set_area(area.get_area() + l2cache->area.get_area()); + } + // //clock power + // clockNetwork.init_wire_external(is_default, &interface_ip); + // clockNetwork.clk_area =area*1.1;//10% of placement overhead. + // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal + // clockNetwork.start_wiring_level =5;//toplevel metal + // clockNetwork.num_regs = corepipe.tot_stage_vector; + // clockNetwork.optimize_wire(); +} + +void BranchPredictor::computeEnergy(bool is_tdp) { + if (!exist) + return; + double r_access; + double w_access; + if (is_tdp) { + r_access = coredynp.predictionW * coredynp.BR_duty_cycle; + w_access = 0 * coredynp.BR_duty_cycle; + globalBPT->stats_t.readAc.access = r_access; + globalBPT->stats_t.writeAc.access = w_access; + globalBPT->tdp_stats = globalBPT->stats_t; + + L1_localBPT->stats_t.readAc.access = r_access; + L1_localBPT->stats_t.writeAc.access = w_access; + L1_localBPT->tdp_stats = L1_localBPT->stats_t; + + L2_localBPT->stats_t.readAc.access = r_access; + L2_localBPT->stats_t.writeAc.access = w_access; + L2_localBPT->tdp_stats = L2_localBPT->stats_t; + + chooser->stats_t.readAc.access = r_access; + chooser->stats_t.writeAc.access = w_access; + chooser->tdp_stats = chooser->stats_t; + + RAS->stats_t.readAc.access = r_access; + RAS->stats_t.writeAc.access = w_access; + RAS->tdp_stats = RAS->stats_t; + } else { + // The resolution of BPT accesses is coarse, but this is + // because most simulators cannot track finer grained details + r_access = XML->sys.core[ithCore].branch_instructions; + w_access = + XML->sys.core[ithCore].branch_mispredictions + + 0.1 * XML->sys.core[ithCore] + .branch_instructions; // 10% of BR will flip internal bits//0 + globalBPT->stats_t.readAc.access = r_access; + globalBPT->stats_t.writeAc.access = w_access; + globalBPT->rtp_stats = globalBPT->stats_t; + + L1_localBPT->stats_t.readAc.access = r_access; + L1_localBPT->stats_t.writeAc.access = w_access; + L1_localBPT->rtp_stats = L1_localBPT->stats_t; + + L2_localBPT->stats_t.readAc.access = r_access; + L2_localBPT->stats_t.writeAc.access = w_access; + L2_localBPT->rtp_stats = L2_localBPT->stats_t; + + chooser->stats_t.readAc.access = r_access; + chooser->stats_t.writeAc.access = w_access; + chooser->rtp_stats = chooser->stats_t; + + RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; + RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; + RAS->rtp_stats = RAS->stats_t; + } + globalBPT->power_t.reset(); + L1_localBPT->power_t.reset(); + L2_localBPT->power_t.reset(); + chooser->power_t.reset(); + RAS->power_t.reset(); + + globalBPT->power_t.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->stats_t.readAc.access + + globalBPT->stats_t.writeAc.access * + globalBPT->local_result.power.writeOp.dynamic; + L1_localBPT->power_t.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->stats_t.readAc.access + + L1_localBPT->stats_t.writeAc.access * + L1_localBPT->local_result.power.writeOp.dynamic; + + L2_localBPT->power_t.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->stats_t.readAc.access + + L2_localBPT->stats_t.writeAc.access * + L2_localBPT->local_result.power.writeOp.dynamic; + + chooser->power_t.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->stats_t.readAc.access + + chooser->stats_t.writeAc.access * + chooser->local_result.power.writeOp.dynamic; + RAS->power_t.readOp.dynamic += + RAS->local_result.power.readOp.dynamic * RAS->stats_t.readAc.access + + RAS->stats_t.writeAc.access * RAS->local_result.power.writeOp.dynamic; + + if (is_tdp) { + globalBPT->power = + globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; + L1_localBPT->power = + L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; + L2_localBPT->power = + L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; + chooser->power = chooser->power_t + chooser->local_result.power * pppm_lkg; + RAS->power = + RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; + + power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + + chooser->power + RAS->power; + } else { + globalBPT->rt_power = + globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; + L1_localBPT->rt_power = + L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; + L2_localBPT->rt_power = + L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; + chooser->rt_power = + chooser->power_t + chooser->local_result.power * pppm_lkg; + RAS->rt_power = + RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; + rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + + L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power; } -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = corepipe.tot_stage_vector; -// clockNetwork.optimize_wire(); } +void BranchPredictor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + if (is_tdp) { + cout << indent_str << "Global Predictor:" << endl; + cout << indent_str_next << "Area = " << globalBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << globalBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? globalBPT->power.readOp.longer_channel_leakage + : globalBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? globalBPT->power.readOp + .power_gated_with_long_channel_leakage + : globalBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << globalBPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Local Predictor:" << endl; + cout << indent_str << "L1_Local Predictor:" << endl; + cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? L1_localBPT->power.readOp.longer_channel_leakage + : L1_localBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? L1_localBPT->power.readOp + .power_gated_with_long_channel_leakage + : L1_localBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << L1_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "L2_Local Predictor:" << endl; + cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? L2_localBPT->power.readOp.longer_channel_leakage + : L2_localBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? L2_localBPT->power.readOp + .power_gated_with_long_channel_leakage + : L2_localBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << L2_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + + cout << indent_str << "Chooser:" << endl; + cout << indent_str_next << "Area = " << chooser->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << chooser->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? chooser->power.readOp.longer_channel_leakage + : chooser->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? chooser->power.readOp.power_gated_with_long_channel_leakage + : chooser->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << chooser->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "RAS:" << endl; + cout << indent_str_next << "Area = " << RAS->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << RAS->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? RAS->power.readOp.longer_channel_leakage + : RAS->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? RAS->power.readOp.power_gated_with_long_channel_leakage + : RAS->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + } else { + // cout << indent_str_next << "Global Predictor Peak Dynamic = " + //<< globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "Global Predictor Subthreshold Leakage = " + // << globalBPT->rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next + //<< "Global Predictor Gate Leakage = " << + // globalBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // << indent_str_next << "Local Predictor Peak Dynamic = " << + // L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Local Predictor Subthreshold Leakage = " << + // L1_localBPT->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next << "Local Predictor Gate Leakage = " << + // L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // << indent_str_next << "Chooser Peak Dynamic = " << + // chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Chooser Subthreshold Leakage = " << + // chooser->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + //<< "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << + //" W" << endl; cout << indent_str_next << "RAS Peak Dynamic = " + //<< RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "RAS Subthreshold Leakage = " << + // RAS->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" + //<< endl; + } +} -void BranchPredictor::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double r_access; - double w_access; - if (is_tdp) - { - r_access = coredynp.predictionW*coredynp.BR_duty_cycle; - w_access = 0*coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; +void InstFetchU::computeEnergy(bool is_tdp) { + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + icache.caches->stats_t.readAc.access = + icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; + icache.caches->stats_t.readAc.miss = 0; + icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - + icache.caches->stats_t.readAc.miss; + icache.caches->tdp_stats = icache.caches->stats_t; + + icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = + icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = + icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb->tdp_stats = icache.missb->stats_t; + + icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb->tdp_stats = icache.ifb->stats_t; + + icache.prefetchb->stats_t.readAc.access = + icache.prefetchb->stats_t.readAc.hit = + icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; + + IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + XML->sys.core[ithCore].peak_issue_width; + IB->tdp_stats = IB->stats_t; + + if (coredynp.predictionW > 0) { + BTB->stats_t.readAc.access = + coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; + BTB->stats_t.writeAc.access = + 0; // XML->sys.core[ithCore].BTB.write_accesses; } - else - { - //The resolution of BPT accesses is coarse, but this is - //because most simulators cannot track finer grained details - r_access = XML->sys.core[ithCore].branch_instructions; - w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; - } - - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + chooser->power + RAS->power; + ID_inst->stats_t.readAc.access = coredynp.decodeW; + ID_operand->stats_t.readAc.access = coredynp.decodeW; + ID_misc->stats_t.readAc.access = coredynp.decodeW; + ID_inst->tdp_stats = ID_inst->stats_t; + ID_operand->tdp_stats = ID_operand->stats_t; + ID_misc->tdp_stats = ID_misc->stats_t; + + } else { + // init stats for Runtime Dynamic (RTP) + icache.caches->stats_t.readAc.access = + XML->sys.core[ithCore].icache.read_accesses; + icache.caches->stats_t.readAc.miss = + XML->sys.core[ithCore].icache.read_misses; + icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - + icache.caches->stats_t.readAc.miss; + icache.caches->rtp_stats = icache.caches->stats_t; + + icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; + icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; + icache.missb->rtp_stats = icache.missb->stats_t; + + icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; + icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; + icache.ifb->rtp_stats = icache.ifb->stats_t; + + icache.prefetchb->stats_t.readAc.access = + icache.caches->stats_t.readAc.miss; + icache.prefetchb->stats_t.writeAc.access = + icache.caches->stats_t.readAc.miss; + icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; + + IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + XML->sys.core[ithCore].total_instructions; + IB->rtp_stats = IB->stats_t; + + if (coredynp.predictionW > 0) { + BTB->stats_t.readAc.access = + XML->sys.core[ithCore] + .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; + BTB->stats_t.writeAc.access = + XML->sys.core[ithCore] + .BTB + .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; + BTB->rtp_stats = BTB->stats_t; } - else - { - globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + + ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_operand->stats_t.readAc.access = + XML->sys.core[ithCore].total_instructions; + ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_inst->rtp_stats = ID_inst->stats_t; + ID_operand->rtp_stats = ID_operand->stats_t; + ID_misc->rtp_stats = ID_misc->stats_t; + } + + icache.power_t.reset(); + IB->power_t.reset(); + // ID_inst->power_t.reset(); + // ID_operand->power_t.reset(); + // ID_misc->power_t.reset(); + if (coredynp.predictionW > 0) { + BTB->power_t.reset(); + } + + icache.power_t.readOp.dynamic += + (icache.caches->stats_t.readAc.hit * + icache.caches->local_result.power.readOp.dynamic + + // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ + icache.caches->stats_t.readAc.miss * + icache.caches->local_result.power.readOp + .dynamic + // assume tag data accessed in parallel + icache.caches->stats_t.readAc.miss * + icache.caches->local_result.power.writeOp + .dynamic); // read miss in Icache cause a write to Icache + icache.power_t.readOp.dynamic += + icache.missb->stats_t.readAc.access * + icache.missb->local_result.power.searchOp.dynamic + + icache.missb->stats_t.writeAc.access * + icache.missb->local_result.power.writeOp + .dynamic; // each access to missb involves a CAM and a write + icache.power_t.readOp.dynamic += + icache.ifb->stats_t.readAc.access * + icache.ifb->local_result.power.searchOp.dynamic + + icache.ifb->stats_t.writeAc.access * + icache.ifb->local_result.power.writeOp.dynamic; + icache.power_t.readOp.dynamic += + icache.prefetchb->stats_t.readAc.access * + icache.prefetchb->local_result.power.searchOp.dynamic + + icache.prefetchb->stats_t.writeAc.access * + icache.prefetchb->local_result.power.writeOp.dynamic; + + IB->power_t.readOp.dynamic += + IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + + IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; + + if (coredynp.predictionW > 0) { + BTB->power_t.readOp.dynamic += + BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + + BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; + + BPT->computeEnergy(is_tdp); + } + + if (is_tdp) { + // icache.power = icache.power_t + + // (icache.caches->local_result.power)*pppm_lkg + + // (icache.missb->local_result.power + + // icache.ifb->local_result.power + + // icache.prefetchb->local_result.power)*pppm_Isub; + icache.power = icache.power_t + (icache.caches->local_result.power + + icache.missb->local_result.power + + icache.ifb->local_result.power + + icache.prefetchb->local_result.power) * + pppm_lkg; + + IB->power = IB->power_t + IB->local_result.power * pppm_lkg; + power = power + icache.power + IB->power; + if (coredynp.predictionW > 0) { + BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; + power = power + BTB->power + BPT->power; + } + + ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; + ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; + ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; + + ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; + ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; + ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; + + power = power + (ID_inst->power + ID_operand->power + ID_misc->power); + } else { + // icache.rt_power = icache.power_t + + // (icache.caches->local_result.power)*pppm_lkg + + // (icache.missb->local_result.power + + // icache.ifb->local_result.power + + // icache.prefetchb->local_result.power)*pppm_Isub; + + icache.rt_power = icache.power_t + (icache.caches->local_result.power + + icache.missb->local_result.power + + icache.ifb->local_result.power + + icache.prefetchb->local_result.power) * + pppm_lkg; + + IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; + rt_power = rt_power + icache.rt_power + IB->rt_power; + if (coredynp.predictionW > 0) { + BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; + rt_power = rt_power + BTB->rt_power + BPT->rt_power; } -} -void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - if (is_tdp) - { - cout << indent_str<< "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? globalBPT->power.readOp.power_gated_with_long_channel_leakage : globalBPT->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <rt_power.readOp.dynamic = + ID_inst->power_t.readOp.dynamic * ID_inst->rtp_stats.readAc.access; + ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * + ID_operand->rtp_stats.readAc.access; + ID_misc->rt_power.readOp.dynamic = + ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; + rt_power = rt_power + + (ID_inst->rt_power + ID_operand->rt_power + ID_misc->rt_power); + } } -void InstFetchU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; +void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + + cout << indent_str << "Instruction Cache:" << endl; + cout << indent_str_next << "Area = " << icache.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << icache.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? icache.power.readOp.longer_channel_leakage + : icache.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? icache.power.readOp.power_gated_with_long_channel_leakage + : icache.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << icache.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (coredynp.predictionW > 0) { + cout << indent_str << "Branch Target Buffer:" << endl; + cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? BTB->power.readOp.longer_channel_leakage + : BTB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? BTB->power.readOp.power_gated_with_long_channel_leakage + : BTB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (BPT->exist) { + cout << indent_str << "Branch Predictor:" << endl; + cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? BPT->power.readOp.longer_channel_leakage + : BPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? BPT->power.readOp.power_gated_with_long_channel_leakage + : BPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 3) { + BPT->displayEnergy(indent + 4, plevel, is_tdp); + } + } + } + cout << indent_str << "Instruction Buffer:" << endl; + cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? IB->power.readOp.longer_channel_leakage + : IB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? IB->power.readOp.power_gated_with_long_channel_leakage + : IB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + cout << indent_str << "Instruction Decoder:" << endl; + cout << indent_str_next << "Area = " + << (ID_inst->area.get_area() + ID_operand->area.get_area() + + ID_misc->area.get_area()) * + coredynp.decodeW * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << (ID_inst->power.readOp.dynamic + ID_operand->power.readOp.dynamic + + ID_misc->power.readOp.dynamic) * + clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? (ID_inst->power.readOp.longer_channel_leakage + + ID_operand->power.readOp.longer_channel_leakage + + ID_misc->power.readOp.longer_channel_leakage) + : (ID_inst->power.readOp.leakage + + ID_operand->power.readOp.leakage + + ID_misc->power.readOp.leakage)) + << " W" << endl; + + double tot_leakage = + (ID_inst->power.readOp.leakage + ID_operand->power.readOp.leakage + + ID_misc->power.readOp.leakage); + double tot_leakage_longchannel = + (ID_inst->power.readOp.longer_channel_leakage + + ID_operand->power.readOp.longer_channel_leakage + + ID_misc->power.readOp.longer_channel_leakage); + double tot_leakage_pg = (ID_inst->power.readOp.power_gated_leakage + + ID_operand->power.readOp.power_gated_leakage + + ID_misc->power.readOp.power_gated_leakage); + double tot_leakage_pg_with_long_channel = + (ID_inst->power.readOp.power_gated_with_long_channel_leakage + + ID_operand->power.readOp.power_gated_with_long_channel_leakage + + ID_misc->power.readOp.power_gated_with_long_channel_leakage); + + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " + << (ID_inst->power.readOp.gate_leakage + + ID_operand->power.readOp.gate_leakage + + ID_misc->power.readOp.gate_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << (ID_inst->rt_power.readOp.dynamic + + ID_operand->rt_power.readOp.dynamic + + ID_misc->rt_power.readOp.dynamic) / + executionTime + << " W" << endl; + cout << endl; + } else { + // cout << indent_str_next << "Instruction Cache Peak Dynamic = " + //<< icache.rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " + // << icache.rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next << "Instruction Cache Gate Leakage = " << + // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Instruction Buffer Peak Dynamic = " << + // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << + // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << + // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next + // << "Instruction Buffer Gate Leakage = " << + // IB->rt_power.readOp.gate_leakage + //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer + // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << + // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold + // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout + // << indent_str_next << "Branch Target Buffer Gate Leakage = " << + // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Branch Predictor Peak Dynamic = " << + // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << + // BPT->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Branch Predictor Gate Leakage = " << + // BPT->rt_power.readOp.gate_leakage + //<< " W" << endl; + } +} - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.missb->tdp_stats = icache.missb->stats_t; +void RENAMINGU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double pppm_t[4] = {1, 1, 1, 1}; + if (is_tdp) { // init stats for Peak + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; + } + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports;; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; + + ffreeL->stats_t.readAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_rd_ports; + ffreeL->stats_t.writeAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_wr_ports; + ffreeL->tdp_stats = ffreeL->stats_t; + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + } + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; + } + // Unified free list for both int and fp + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; + } + idcl->stats_t.readAc.access = coredynp.decodeW; + fdcl->stats_t.readAc.access = coredynp.decodeW; + idcl->tdp_stats = idcl->stats_t; + fdcl->tdp_stats = fdcl->stats_t; + } else { + if (coredynp.issueW > 1) { + idcl->stats_t.readAc.access = coredynp.decodeW; + fdcl->stats_t.readAc.access = coredynp.decodeW; + idcl->tdp_stats = idcl->stats_t; + fdcl->tdp_stats = fdcl->stats_t; + } + } - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.ifb->tdp_stats = icache.ifb->stats_t; + } else { // init stats for Runtime Dynamic (RTP) + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .fp_rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + fRRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; + } + ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + ifreeL->stats_t.writeAc.access = + 2 * XML->sys.core[ithCore].rename_writes; + ifreeL->rtp_stats = ifreeL->stats_t; + + ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + ffreeL->stats_t.writeAc.access = + 2 * XML->sys.core[ithCore].fp_rename_writes; + ffreeL->rtp_stats = ffreeL->stats_t; + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + // iFRAT->stats_t.searchAc.access = + // XML->sys.core[ithCore].committed_int_instructions;//hack: not all + // committed instructions use regs. + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + // fFRAT->stats_t.searchAc.access = + // XML->sys.core[ithCore].committed_fp_instructions; + fFRAT->rtp_stats = fFRAT->stats_t; + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .fp_rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + fRRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; + } + // Unified free list for both int and fp since the ROB act as physcial + // registers + ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + + XML->sys.core[ithCore].fp_rename_reads; + ifreeL->stats_t.writeAc.access = + 2 * (XML->sys.core[ithCore].rename_writes + + XML->sys.core[ithCore] + .fp_rename_writes); // HACK: 2-> since some of renaming in + // the same group are terminated early + ifreeL->rtp_stats = ifreeL->stats_t; + } + idcl->stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * + XML->sys.core[ithCore].rename_reads; + fdcl->stats_t.readAc.access = 3 * coredynp.fp_issueW * + coredynp.fp_issueW * + XML->sys.core[ithCore].fp_rename_writes; + idcl->rtp_stats = idcl->stats_t; + fdcl->rtp_stats = fdcl->stats_t; + } else { + if (coredynp.issueW > 1) { + idcl->stats_t.readAc.access = + 2 * XML->sys.core[ithCore].int_instructions; + fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; + idcl->rtp_stats = idcl->stats_t; + fdcl->rtp_stats = fdcl->stats_t; + } + } + } + /* Compute engine */ + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); + } - icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; + ifreeL->power_t.reset(); + ffreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); + ffreeL->power_t.readOp.dynamic += + (ffreeL->stats_t.readAc.access * + ffreeL->local_result.power.readOp.dynamic + + ffreeL->stats_t.writeAc.access * + ffreeL->local_result.power.writeOp.dynamic); + + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } else if ((coredynp.rm_ty == CAMbased)) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); + } - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses; - } + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); + } - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; + } else { + if (coredynp.issueW > 1) { + idcl->power_t.reset(); + fdcl->power_t.reset(); + set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, + coredynp.num_hthreads, idcl->stats_t.readAc.access); + idcl->power_t = idcl->power * pppm_t; + set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, + coredynp.num_hthreads, idcl->stats_t.readAc.access); + fdcl->power_t = fdcl->power * pppm_t; + } + } + // assign value to tpd and rtp + if (is_tdp) { + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->power = ffreeL->power_t + ffreeL->local_result.power; + power = power + + (iFRAT->power + fFRAT->power) + //+ (iRRAT->power + fRRAT->power) + + (ifreeL->power + ffreeL->power); + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); + } + } else if (coredynp.scheu_ty == ReservationStation) { + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + power = power + (iFRAT->power + fFRAT->power) + ifreeL->power; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); + } + } + } else { + power = power + idcl->power_t + fdcl->power_t; + } + } else { + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power; + rt_power = rt_power + + (iFRAT->rt_power + fFRAT->rt_power) + // + (iRRAT->rt_power + + // fRRAT->rt_power) + + (ifreeL->rt_power + ffreeL->rt_power); + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + } + } else if (coredynp.scheu_ty == ReservationStation) { + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + rt_power = + rt_power + (iFRAT->rt_power + fFRAT->rt_power) + ifreeL->rt_power; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + } + } + } else { + rt_power = rt_power + idcl->power_t + fdcl->power_t; } - else - { - //init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; - - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; - } - - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; + } +} +void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + + if (coredynp.core_ty == OOO) { + cout << indent_str << "Int Front End RAT with " + << coredynp.globalCheckpoint << " internal checkpoints:" << endl; + cout << indent_str_next << "Area = " << iFRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << iFRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? iFRAT->power.readOp.longer_channel_leakage + : iFRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? iFRAT->power.readOp.power_gated_with_long_channel_leakage + : iFRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << iFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "FP Front End RAT with " + << coredynp.globalCheckpoint << " internal checkpoints:" << endl; + cout << indent_str_next << "Area = " << fFRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fFRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fFRAT->power.readOp.longer_channel_leakage + : fFRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fFRAT->power.readOp.power_gated_with_long_channel_leakage + : fFRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Free List:" << endl; + cout << indent_str_next << "Area = " << ifreeL->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ifreeL->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ifreeL->power.readOp.longer_channel_leakage + : ifreeL->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ifreeL->power.readOp.power_gated_with_long_channel_leakage + : ifreeL->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ifreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + cout << indent_str << "Int Retire RAT: " << endl; + cout << indent_str_next << "Area = " << iRRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << iRRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? iRRAT->power.readOp.longer_channel_leakage + : iRRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? iRRAT->power.readOp + .power_gated_with_long_channel_leakage + : iRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << iRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "FP Retire RAT:" << endl; + cout << indent_str_next << "Area = " << fRRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fRRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fRRAT->power.readOp.longer_channel_leakage + : fRRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? fRRAT->power.readOp + .power_gated_with_long_channel_leakage + : fRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + if (coredynp.scheu_ty == PhysicalRegFile) { + cout << indent_str << "FP Free List:" << endl; + cout << indent_str_next << "Area = " << ffreeL->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ffreeL->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ffreeL->power.readOp.longer_channel_leakage + : ffreeL->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? ffreeL->power.readOp + .power_gated_with_long_channel_leakage + : ffreeL->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ffreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else { + cout << indent_str << "Int DCL:" << endl; + cout << indent_str_next + << "Peak Dynamic = " << idcl->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? idcl->power.readOp.longer_channel_leakage + : idcl->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? idcl->power.readOp.power_gated_with_long_channel_leakage + : idcl->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << idcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << indent_str << "FP DCL:" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fdcl->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fdcl->power.readOp.longer_channel_leakage + : fdcl->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fdcl->power.readOp.power_gated_with_long_channel_leakage + : fdcl->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fdcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + } + } else { + if (coredynp.core_ty == OOO) { + cout << indent_str_next << "Int Front End RAT Peak Dynamic = " + << iFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " + << iFRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Int Front End RAT Gate Leakage = " + << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Front End RAT Peak Dynamic = " + << fFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " + << fFRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Front End RAT Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Free List Peak Dynamic = " + << ifreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Free List Subthreshold Leakage = " + << ifreeL->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Free List Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.scheu_ty == PhysicalRegFile) { + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + cout << indent_str_next << "Int Retire RAT Peak Dynamic = " + << iRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " + << iRRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Int Retire RAT Gate Leakage = " + << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Retire RAT Peak Dynamic = " + << fRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " + << fRRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Retire RAT Gate Leakage = " + << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; + } + cout << indent_str_next << "FP Free List Peak Dynamic = " + << ffreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Free List Subthreshold Leakage = " + << ffreeL->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Free List Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + } + } else { + cout << indent_str_next << "Int DCL Peak Dynamic = " + << idcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int DCL Subthreshold Leakage = " + << idcl->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "FP DCL Peak Dynamic = " + << fdcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP DCL Subthreshold Leakage = " + << fdcl->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage + << " W" << endl; } + } +} - icache.power_t.reset(); - IB->power_t.reset(); -// ID_inst->power_t.reset(); -// ID_operand->power_t.reset(); -// ID_misc->power_t.reset(); - if (coredynp.predictionW>0) - { - BTB->power_t.reset(); +void SchedulerU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double ROB_duty_cycle; + // ROB_duty_cycle = ((coredynp.ALU_duty_cycle + + // coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 + // + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 + //? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 + // + + // coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; + ROB_duty_cycle = 1; + // init stats + if (is_tdp) { + if (coredynp.core_ty == OOO) { + int_inst_window->stats_t.readAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; + int_inst_window->stats_t.writeAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; + int_inst_window->stats_t.searchAc.access = + coredynp.issueW * coredynp.num_pipelines; + int_inst_window->tdp_stats = int_inst_window->stats_t; + fp_inst_window->stats_t.readAc.access = + fp_inst_window->l_ip.num_rd_ports * coredynp.num_fp_pipelines; + fp_inst_window->stats_t.writeAc.access = + fp_inst_window->l_ip.num_wr_ports * coredynp.num_fp_pipelines; + fp_inst_window->stats_t.searchAc.access = + fp_inst_window->l_ip.num_search_ports * coredynp.num_fp_pipelines; + fp_inst_window->tdp_stats = fp_inst_window->stats_t; + + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->stats_t.readAc.access = + coredynp.commitW * coredynp.num_pipelines * ROB_duty_cycle; + ROB->stats_t.writeAc.access = + coredynp.issueW * coredynp.num_pipelines * ROB_duty_cycle; + ROB->tdp_stats = ROB->stats_t; + + /* + * When inst commits, ROB must be read. + * Because for Physcial register based cores, physical register tag in + * ROB need to be read out and write into RRAT/CAM based RAT. For RS + * based cores, register content that stored in ROB must be read out and + * stored in architectural registers. + * + * if no-register is involved, the ROB read out operation when + * instruction commits can be ignored. assuming 20% insts. belong this + * type. + * TODO: ROB duty_cycle need to be revisited + */ + } + + } else if (coredynp.multithreaded) { + int_inst_window->stats_t.readAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; + int_inst_window->stats_t.writeAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; + int_inst_window->stats_t.searchAc.access = + coredynp.issueW * coredynp.num_pipelines; + int_inst_window->tdp_stats = int_inst_window->stats_t; } - icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+ - //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache - icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic; - icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic; - - IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access + - IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic; - - if (coredynp.predictionW>0) - { - BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic; - - BPT->computeEnergy(is_tdp); - } - - if (is_tdp) - { -// icache.power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->power = IB->power_t + IB->local_result.power*pppm_lkg; - power = power + icache.power + IB->power; - if (coredynp.predictionW>0) - { - BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg; - power = power + BTB->power + BPT->power; - } - - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; - - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; - - power = power + (ID_inst->power + - ID_operand->power + - ID_misc->power); + } else { // rtp + if (coredynp.core_ty == OOO) { + int_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].inst_window_reads; + int_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].inst_window_writes; + int_inst_window->stats_t.searchAc.access = + XML->sys.core[ithCore].inst_window_wakeup_accesses; + int_inst_window->rtp_stats = int_inst_window->stats_t; + fp_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].fp_inst_window_reads; + fp_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_inst_window_writes; + fp_inst_window->stats_t.searchAc.access = + XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; + fp_inst_window->rtp_stats = fp_inst_window->stats_t; + + if (XML->sys.core[ithCore].ROB_size > 0) { + + ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; + ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; + /* ROB need to be updated in RS based OOO when new values are produced, + * this update may happen before the commit stage when ROB entry is + * released + * 1. ROB write at instruction inserted in + * 2. ROB write as results produced (for RS based OOO only) + * 3. ROB read as instruction committed. For RS based OOO, data values + * are read out and sent to ARF For Physical reg based OOO, no data + * stored in ROB, but register tags need to be read out and used to set + * the RRAT and to recycle the register tag to free list buffer + */ + ROB->rtp_stats = ROB->stats_t; + } + + } else if (coredynp.multithreaded) { + int_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions; + int_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions; + int_inst_window->stats_t.searchAc.access = + 2 * (XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions); + int_inst_window->rtp_stats = int_inst_window->stats_t; } - else - { -// icache.rt_power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; - if (coredynp.predictionW>0) - { - BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; - } - - ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; - - rt_power = rt_power + (ID_inst->rt_power + - ID_operand->rt_power + - ID_misc->rt_power); + } + + // computation engine + if (coredynp.core_ty == OOO) { + int_inst_window->power_t.reset(); + fp_inst_window->power_t.reset(); + + /* each instruction needs to write to scheduler, read out when all resources + * and source operands are ready two search ops with one for each source + * operand + * + */ + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->stats_t.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->stats_t.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->stats_t.writeAc.access + + int_inst_window->stats_t.readAc.access * + instruction_selection->power.readOp.dynamic; + + fp_inst_window->power_t.readOp.dynamic += + fp_inst_window->local_result.power.readOp.dynamic * + fp_inst_window->stats_t.readAc.access + + fp_inst_window->local_result.power.searchOp.dynamic * + fp_inst_window->stats_t.searchAc.access + + fp_inst_window->local_result.power.writeOp.dynamic * + fp_inst_window->stats_t.writeAc.access + + fp_inst_window->stats_t.writeAc.access * + instruction_selection->power.readOp.dynamic; + + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->power_t.reset(); + ROB->power_t.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * ROB->stats_t.readAc.access + + ROB->stats_t.writeAc.access * ROB->local_result.power.writeOp.dynamic; } -} -void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - - cout << indent_str<< "Instruction Cache:" << endl; - cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? icache.power.readOp.power_gated_with_long_channel_leakage : icache.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <0) - { - cout << indent_str<< "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? BTB->power.readOp.power_gated_with_long_channel_leakage : BTB->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <exist) - { - cout << indent_str<< "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? BPT->power.readOp.power_gated_with_long_channel_leakage : BPT->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3) - { - BPT->displayEnergy(indent+4, plevel, is_tdp); - } - } - } - cout << indent_str<< "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? IB->power.readOp.power_gated_with_long_channel_leakage : IB->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <power.readOp.leakage + ID_operand->power.readOp.leakage + ID_misc->power.readOp.leakage); - double tot_leakage_longchannel = (ID_inst->power.readOp.longer_channel_leakage + ID_operand->power.readOp.longer_channel_leakage + ID_misc->power.readOp.longer_channel_leakage); - double tot_leakage_pg = (ID_inst->power.readOp.power_gated_leakage + ID_operand->power.readOp.power_gated_leakage + ID_misc->power.readOp.power_gated_leakage); - double tot_leakage_pg_with_long_channel = (ID_inst->power.readOp.power_gated_with_long_channel_leakage + ID_operand->power.readOp.power_gated_with_long_channel_leakage + ID_misc->power.readOp.power_gated_with_long_channel_leakage); - - - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage + - ID_operand->power.readOp.gate_leakage + - ID_misc->power.readOp.gate_leakage) << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic + - ID_operand->rt_power.readOp.dynamic + - ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl; - cout <power_t.reset(); + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->stats_t.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->stats_t.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->stats_t.writeAc.access + + int_inst_window->stats_t.writeAc.access * + instruction_selection->power.readOp.dynamic; + } + + // assign values + if (is_tdp) { + if (coredynp.core_ty == OOO) { + int_inst_window->power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + fp_inst_window->power = + fp_inst_window->power_t + + (fp_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + power = power + int_inst_window->power + fp_inst_window->power; + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->power = ROB->power_t + ROB->local_result.power * pppm_lkg; + power = power + ROB->power; + } + + } else if (coredynp.multithreaded) { + // set_pppm(pppm_t, + // XML->sys.core[ithCore].issue_width,1, 1, 1); + int_inst_window->power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + power = power + int_inst_window->power; + } + } else { // rtp + if (coredynp.core_ty == OOO) { + int_inst_window->rt_power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + fp_inst_window->rt_power = + fp_inst_window->power_t + + (fp_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + rt_power = + rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->rt_power = ROB->power_t + ROB->local_result.power * pppm_lkg; + rt_power = rt_power + ROB->rt_power; + } + + } else if (coredynp.multithreaded) { + // set_pppm(pppm_t, + // XML->sys.core[ithCore].issue_width,1, 1, 1); + int_inst_window->rt_power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + rt_power = rt_power + int_inst_window->rt_power; + } + } + // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); + // cout<<"Scheduler + // power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - } - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - } - //Unified free list for both int and fp - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - } - - } - else - {//init stats for Runtime Dynamic (RTP) - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - } - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; -// iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; -// fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - } - //Unified free list for both int and fp since the ROB act as physcial registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group - //are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - } - - } - /* Compute engine */ - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - - iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic - +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic - +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); - } - - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic - +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic); - - } - else if (coredynp.scheu_ty==ReservationStation) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - - iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic - +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic - +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); - } - - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - } - - } - else - { - if (coredynp.issueW>1) - { - idcl->power_t.reset(); - fdcl->power_t.reset(); - set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; - } - - } - - //assign value to tpd and rtp - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power ; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power ; - power = power + (iFRAT->power + fFRAT->power) - //+ (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; - power = power + (iRRAT->power + fRRAT->power); - } - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power ; - power = power + (iFRAT->power + fFRAT->power) - + ifreeL->power; - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power ; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power ; - power = power + (iRRAT->power + fRRAT->power); - } - } - } - else - { - power = power + idcl->power_t + fdcl->power_t; - } - - } - else - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) + fdcl->power_t; - - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power ; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power ; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - // + (iRRAT->rt_power + fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); - - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power ; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power ; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); - } - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power ; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + ifreeL->rt_power; - if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1)) - { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power ; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power ; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); - } - } - } - else - { - rt_power = rt_power + idcl->power_t + fdcl->power_t; - } - - } +void SchedulerU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + if (coredynp.core_ty == OOO) { + cout << indent_str << "Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? int_inst_window->power.readOp.longer_channel_leakage + : int_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? int_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : int_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "FP Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << fp_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << fp_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? fp_inst_window->power.readOp.longer_channel_leakage + : fp_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fp_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : fp_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fp_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + if (XML->sys.core[ithCore].ROB_size > 0) { + cout << indent_str << "ROB:" << endl; + cout << indent_str_next << "Area = " << ROB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ROB->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ROB->power.readOp.longer_channel_leakage + : ROB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ROB->power.readOp.power_gated_with_long_channel_leakage + : ROB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ROB->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else if (coredynp.multithreaded) { + cout << indent_str << "Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? int_inst_window->power.readOp.longer_channel_leakage + : int_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? int_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : int_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + } + } else { + if (coredynp.core_ty == OOO) { + cout << indent_str_next << "Instruction Window Peak Dynamic = " + << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Instruction Window Subthreshold Leakage = " + << int_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Window Gate Leakage = " + << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Instruction Window Peak Dynamic = " + << fp_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next + << "FP Instruction Window Subthreshold Leakage = " + << fp_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Instruction Window Gate Leakage = " + << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + if (XML->sys.core[ithCore].ROB_size > 0) { + cout << indent_str_next << "ROB Peak Dynamic = " + << ROB->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage + << " W" << endl; + } + } else if (coredynp.multithreaded) { + cout << indent_str_next << "Instruction Window Peak Dynamic = " + << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Instruction Window Subthreshold Leakage = " + << int_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Window Gate Leakage = " + << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + } + } } -void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - - if (coredynp.core_ty==OOO) - { - cout << indent_str<< "Int Front End RAT with " << coredynp.globalCheckpoint <<" internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? iFRAT->power.readOp.power_gated_with_long_channel_leakage : iFRAT->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <stats_t.readAc.access = + 0.67 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches->stats_t.readAc.miss = 0; + dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - + dcache.caches->stats_t.readAc.miss; + dcache.caches->stats_t.writeAc.access = + 0.33 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches->stats_t.writeAc.miss = 0; + dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - + dcache.caches->stats_t.writeAc.miss; + dcache.caches->tdp_stats = dcache.caches->stats_t; + + dcache.missb->stats_t.readAc.access = + dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb->stats_t.writeAc.access = + dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb->tdp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb->stats_t.writeAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb->tdp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.prefetchb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb->stats_t.writeAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; + if (cache_p == Write_back) { + dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; + dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; + dcache.wbb->tdp_stats = dcache.wbb->stats_t; + } -} + LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = + LSQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LSQ->tdp_stats = LSQ->stats_t; + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = + LoadQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LoadQ->tdp_stats = LoadQ->stats_t; + } + } else { + // init stats for Runtime Dynamic (RTP) + dcache.caches->stats_t.readAc.access = + XML->sys.core[ithCore].dcache.read_accesses; + dcache.caches->stats_t.readAc.miss = + XML->sys.core[ithCore].dcache.read_misses; + dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - + dcache.caches->stats_t.readAc.miss; + dcache.caches->stats_t.writeAc.access = + XML->sys.core[ithCore].dcache.write_accesses; + dcache.caches->stats_t.writeAc.miss = + XML->sys.core[ithCore].dcache.write_misses; + dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - + dcache.caches->stats_t.writeAc.miss; + dcache.caches->rtp_stats = dcache.caches->stats_t; + + if (cache_p == Write_back) { + dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.missb->stats_t.writeAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.missb->rtp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.ifb->rtp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.prefetchb->stats_t.writeAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; + + dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.wbb->rtp_stats = dcache.wbb->stats_t; + } else { + dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; + dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; + dcache.missb->rtp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; + dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; + dcache.ifb->rtp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.caches->stats_t.readAc.miss; + dcache.prefetchb->stats_t.writeAc.access = + dcache.caches->stats_t.readAc.miss; + dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; + } + LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions) * + 2; // flush overhead considered + LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions) * + 2; + LSQ->rtp_stats = LSQ->stats_t; + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions; + LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions; + LoadQ->rtp_stats = LoadQ->stats_t; + } + } -void SchedulerU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double ROB_duty_cycle; -// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; - ROB_duty_cycle = 1; - //init stats - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; - - /* - * When inst commits, ROB must be read. - * Because for Physcial register based cores, physical register tag in ROB - * need to be read out and write into RRAT/CAM based RAT. - * For RS based cores, register content that stored in ROB must be - * read out and stored in architectural registers. - * - * if no-register is involved, the ROB read out operation when instruction commits can be ignored. - * assuming 20% insts. belong this type. - * TODO: ROB duty_cycle need to be revisited - */ - } - - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - } - - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; - /* ROB need to be updated in RS based OOO when new values are produced, - * this update may happen before the commit stage when ROB entry is released - * 1. ROB write at instruction inserted in - * 2. ROB write as results produced (for RS based OOO only) - * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF - * For Physical reg based OOO, no data stored in ROB, but register tags need to be - * read out and used to set the RRAT and to recycle the register tag to free list buffer - */ - ROB->rtp_stats = ROB->stats_t; - } - - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; - } + dcache.power_t.reset(); + LSQ->power_t.reset(); + dcache.power_t.readOp.dynamic += + (dcache.caches->stats_t.readAc.hit * + dcache.caches->local_result.power.readOp.dynamic + + dcache.caches->stats_t.readAc.miss * + dcache.caches->local_result.power.readOp + .dynamic + // assuming D cache is in the fast model which read + // tag and data together + dcache.caches->stats_t.writeAc.miss * + dcache.caches->local_result.tag_array2->power.readOp.dynamic + + dcache.caches->stats_t.writeAc.access * + dcache.caches->local_result.power.writeOp.dynamic); + + if (cache_p == Write_back) { // write miss will generate a write later + dcache.power_t.readOp.dynamic += + dcache.caches->stats_t.writeAc.miss * + dcache.caches->local_result.power.writeOp.dynamic; + } + + dcache.power_t.readOp.dynamic += + dcache.missb->stats_t.readAc.access * + dcache.missb->local_result.power.searchOp.dynamic + + dcache.missb->stats_t.writeAc.access * + dcache.missb->local_result.power.writeOp + .dynamic; // each access to missb involves a CAM and a write + dcache.power_t.readOp.dynamic += + dcache.ifb->stats_t.readAc.access * + dcache.ifb->local_result.power.searchOp.dynamic + + dcache.ifb->stats_t.writeAc.access * + dcache.ifb->local_result.power.writeOp.dynamic; + dcache.power_t.readOp.dynamic += + dcache.prefetchb->stats_t.readAc.access * + dcache.prefetchb->local_result.power.searchOp.dynamic + + dcache.prefetchb->stats_t.writeAc.access * + dcache.prefetchb->local_result.power.writeOp.dynamic; + if (cache_p == Write_back) { + dcache.power_t.readOp.dynamic += + dcache.wbb->stats_t.readAc.access * + dcache.wbb->local_result.power.searchOp.dynamic + + dcache.wbb->stats_t.writeAc.access * + dcache.wbb->local_result.power.writeOp.dynamic; + } + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->power_t.reset(); + LoadQ->power_t.readOp.dynamic += + LoadQ->stats_t.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->stats_t.writeAc.access * + LoadQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LoadQ + + LSQ->power_t.readOp.dynamic += + LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->stats_t.writeAc.access * + LSQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LSQ + + } else { + LSQ->power_t.readOp.dynamic += + LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->stats_t.writeAc.access * + LSQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LSQ + } + + if (is_tdp) { + // dcache.power = dcache.power_t + + // (dcache.caches->local_result.power)*pppm_lkg + + // (dcache.missb->local_result.power + + // dcache.ifb->local_result.power + + // dcache.prefetchb->local_result.power + + // dcache.wbb->local_result.power)*pppm_Isub; + dcache.power = dcache.power_t + (dcache.caches->local_result.power + + dcache.missb->local_result.power + + dcache.ifb->local_result.power + + dcache.prefetchb->local_result.power) * + pppm_lkg; + if (cache_p == Write_back) { + dcache.power = dcache.power + dcache.wbb->local_result.power * pppm_lkg; } - //computation engine - if (coredynp.core_ty==OOO) - { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); - - /* each instruction needs to write to scheduler, read out when all resources and source operands are ready - * two search ops with one for each source operand - * - */ - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic; - - fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access - + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access - + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access - + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic; - } - - - - - } - else if (coredynp.multithreaded) - { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - } - - //assign values - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg; - power = power + ROB->power; - } - - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power; - } - - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg; - rt_power = rt_power + ROB->rt_power; - } - - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; - } + LSQ->power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; + power = power + dcache.power + LSQ->power; + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; + power = power + LoadQ->power; + } + } else { + // dcache.rt_power = dcache.power_t + + // (dcache.caches->local_result.power + + // dcache.missb->local_result.power + // + dcache.ifb->local_result.power + + // dcache.prefetchb->local_result.power + + // dcache.wbb->local_result.power)*pppm_lkg; + dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + + dcache.missb->local_result.power + + dcache.ifb->local_result.power + + dcache.prefetchb->local_result.power) * + pppm_lkg; + + if (cache_p == Write_back) { + dcache.rt_power = + dcache.rt_power + dcache.wbb->local_result.power * pppm_lkg; } -// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); -// cout<<"Scheduler power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? int_inst_window->power.readOp.power_gated_with_long_channel_leakage : int_inst_window->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].ROB_size >0) - { - cout << indent_str<<"ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? ROB->power.readOp.power_gated_with_long_channel_leakage : ROB->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].ROB_size >0) - { - cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl; - } - } - else if (coredynp.multithreaded) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - } - } + LSQ->rt_power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; + rt_power = rt_power + dcache.rt_power + LSQ->rt_power; + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; + rt_power = rt_power + LoadQ->rt_power; + } + } } -void LoadStoreU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; - if (cache_p==Write_back) - { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; - } - - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; - - if (cache_p==Write_back) - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; - } - else - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - } - - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2; - LSQ->rtp_stats = LSQ->stats_t; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; - } - - } - - dcache.power_t.reset(); - LSQ->power_t.reset(); - dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+ //assuming D cache is in the fast model which read tag and data together - dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic); - - if (cache_p==Write_back) - {//write miss will generate a write later - dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic; - } +void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Data Cache:" << endl; + cout << indent_str_next << "Area = " << dcache.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << dcache.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? dcache.power.readOp.longer_channel_leakage + : dcache.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? dcache.power.readOp.power_gated_with_long_channel_leakage + : dcache.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << dcache.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (coredynp.core_ty == Inorder) { + cout << indent_str << "Load/Store Queue:" << endl; + cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LSQ->power.readOp.longer_channel_leakage + : LSQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? LSQ->power.readOp.power_gated_with_long_channel_leakage + : LSQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else - dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic; - dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic; - if (cache_p==Write_back) { - dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic - + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic; + if (XML->sys.core[ithCore].load_buffer_size > 0) { + cout << indent_str << "LoadQ:" << endl; + cout << indent_str_next << "Area = " << LoadQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LoadQ->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LoadQ->power.readOp.longer_channel_leakage + : LoadQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? LoadQ->power.readOp + .power_gated_with_long_channel_leakage + : LoadQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LoadQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + cout << indent_str << "StoreQ:" << endl; + cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LSQ->power.readOp.longer_channel_leakage + : LSQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? LSQ->power.readOp.power_gated_with_long_channel_leakage + : LSQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else { + cout << indent_str_next << "Data Cache Peak Dynamic = " + << dcache.rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Data Cache Subthreshold Leakage = " + << dcache.rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Data Cache Gate Leakage = " + << dcache.rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.core_ty == Inorder) { + cout << indent_str_next << "Load/Store Queue Peak Dynamic = " + << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " + << LSQ->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Load/Store Queue Gate Leakage = " + << LSQ->rt_power.readOp.gate_leakage << " W" << endl; + } else { + cout << indent_str_next << "LoadQ Peak Dynamic = " + << LoadQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "LoadQ Subthreshold Leakage = " + << LoadQ->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "StoreQ Peak Dynamic = " + << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage + << " W" << endl; } + } +} - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+ - LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ +void MemManU::computeEnergy(bool is_tdp) { + + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + itlb->stats_t.readAc.access = + itlb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + itlb->stats_t.readAc.miss = 0; + itlb->stats_t.readAc.hit = + itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; + itlb->tdp_stats = itlb->stats_t; + + dtlb->stats_t.readAc.access = + dtlb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dtlb->stats_t.readAc.miss = 0; + dtlb->stats_t.readAc.hit = + dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; + dtlb->tdp_stats = dtlb->stats_t; + } else { + // init stats for Runtime Dynamic (RTP) + itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; + itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; + itlb->stats_t.readAc.hit = + itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; + itlb->rtp_stats = itlb->stats_t; + + dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; + dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; + dtlb->stats_t.readAc.hit = + dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; + dtlb->rtp_stats = dtlb->stats_t; + } - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ + itlb->power_t.reset(); + dtlb->power_t.reset(); + itlb->power_t.readOp.dynamic += + itlb->stats_t.readAc.access * itlb->local_result.power.searchOp + .dynamic // FA spent most power in tag, + // so use total access not hits + + itlb->stats_t.readAc.miss * itlb->local_result.power.writeOp.dynamic; + dtlb->power_t.readOp.dynamic += + dtlb->stats_t.readAc.access * dtlb->local_result.power.searchOp + .dynamic // FA spent most power in tag, + // so use total access not hits + + dtlb->stats_t.readAc.miss * dtlb->local_result.power.writeOp.dynamic; + + if (is_tdp) { + itlb->power = itlb->power_t + itlb->local_result.power * pppm_lkg; + dtlb->power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; + power = power + itlb->power + dtlb->power; + } else { + itlb->rt_power = itlb->power_t + itlb->local_result.power * pppm_lkg; + dtlb->rt_power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; + rt_power = rt_power + itlb->rt_power + dtlb->rt_power; + } +} +void MemManU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Itlb:" << endl; + cout << indent_str_next << "Area = " << itlb->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << itlb->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? itlb->power.readOp.longer_channel_leakage + : itlb->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? itlb->power.readOp.power_gated_with_long_channel_leakage + : itlb->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << itlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Dtlb:" << endl; + cout << indent_str_next << "Area = " << dtlb->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << dtlb->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? dtlb->power.readOp.longer_channel_leakage + : dtlb->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? dtlb->power.readOp.power_gated_with_long_channel_leakage + : dtlb->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << dtlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else { + cout << indent_str_next << "Itlb Peak Dynamic = " + << itlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Dtlb Peak Dynamic = " + << dtlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage + << " W" << endl; + } +} + +void RegFU::computeEnergy(bool is_tdp) { + /* + * Architecture RF and physical RF cannot be present at the same time. + * Therefore, the RF stats can only refer to either ARF or PRF; + * And the same stats can be used for both. + */ + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + IRF->stats_t.readAc.access = + coredynp.issueW * 2 * + (coredynp.ALU_duty_cycle * 1.1 + + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * + coredynp.num_pipelines; + IRF->stats_t.writeAc.access = + coredynp.issueW * + (coredynp.ALU_duty_cycle * 1.1 + + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * + coredynp.num_pipelines; + // Rule of Thumb: about 10% RF related instructions do not need to access + // ALUs + IRF->tdp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports * + coredynp.FPU_duty_cycle * 1.05 * + coredynp.num_fp_pipelines; + FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports * + coredynp.FPU_duty_cycle * 1.05 * + coredynp.num_fp_pipelines; + FRF->tdp_stats = FRF->stats_t; + if (coredynp.regWindowing) { + RFWIN->stats_t.readAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; + RFWIN->stats_t.writeAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; + RFWIN->tdp_stats = RFWIN->stats_t; } - else - { - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ + } else { + // init stats for Runtime Dynamic (RTP) + IRF->stats_t.readAc.access = + XML->sys.core[ithCore] + .int_regfile_reads; // TODO: no diff on archi and phy + IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; + IRF->rtp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; + FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; + FRF->rtp_stats = FRF->stats_t; + if (coredynp.regWindowing) { + RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls * 16; + RFWIN->stats_t.writeAc.access = + XML->sys.core[ithCore].function_calls * 16; + RFWIN->rtp_stats = RFWIN->stats_t; + + IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + + XML->sys.core[ithCore].function_calls * 16; + IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + + XML->sys.core[ithCore].function_calls * 16; + IRF->rtp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + + XML->sys.core[ithCore].function_calls * 16; + ; + FRF->stats_t.writeAc.access = + XML->sys.core[ithCore].float_regfile_writes + + XML->sys.core[ithCore].function_calls * 16; + ; + FRF->rtp_stats = FRF->stats_t; + } + } + IRF->power_t.reset(); + FRF->power_t.reset(); + IRF->power_t.readOp.dynamic += + (IRF->stats_t.readAc.access * IRF->local_result.power.readOp.dynamic + + IRF->stats_t.writeAc.access * IRF->local_result.power.writeOp.dynamic); + FRF->power_t.readOp.dynamic += + (FRF->stats_t.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->stats_t.writeAc.access * FRF->local_result.power.writeOp.dynamic); + if (coredynp.regWindowing) { + RFWIN->power_t.reset(); + RFWIN->power_t.readOp.dynamic += + (RFWIN->stats_t.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->stats_t.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic); + } + if (is_tdp) { + IRF->power = IRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) + : IRF->local_result.power); + FRF->power = FRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) + : FRF->local_result.power); + power = power + (IRF->power + FRF->power); + if (coredynp.regWindowing) { + RFWIN->power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; + power = power + RFWIN->power; + } + } else { + IRF->rt_power = + IRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) + : IRF->local_result.power); + FRF->rt_power = + FRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) + : FRF->local_result.power); + rt_power = rt_power + (IRF->power_t + FRF->power_t); + if (coredynp.regWindowing) { + RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; + rt_power = rt_power + RFWIN->rt_power; } + } +} - if (is_tdp) - { -// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg + -// (dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) *pppm_lkg; - if (cache_p==Write_back) - { - dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg; - } - - LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - power = power + dcache.power + LSQ->power; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - power = power + LoadQ->power; - } +void RegFU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Integer RF:" << endl; + cout << indent_str_next << "Area = " << IRF->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << IRF->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? IRF->power.readOp.longer_channel_leakage + : IRF->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? IRF->power.readOp.power_gated_with_long_channel_leakage + : IRF->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + cout << indent_str << "Floating Point RF:" << endl; + cout << indent_str_next << "Area = " << FRF->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << FRF->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? FRF->power.readOp.longer_channel_leakage + : FRF->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? FRF->power.readOp.power_gated_with_long_channel_leakage + : FRF->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (coredynp.regWindowing) { + cout << indent_str << "Register Windows:" << endl; + cout << indent_str_next << "Area = " << RFWIN->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << RFWIN->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? RFWIN->power.readOp.longer_channel_leakage + : RFWIN->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? RFWIN->power.readOp.power_gated_with_long_channel_leakage + : RFWIN->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << RFWIN->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; } - else - { -// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + -// dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power )*pppm_lkg; - - if (cache_p==Write_back) - { - dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg; - } - - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; - } + } else { + cout << indent_str_next << "Integer RF Peak Dynamic = " + << IRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Integer RF Subthreshold Leakage = " + << IRF->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Floating Point RF Peak Dynamic = " + << FRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " + << FRF->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Floating Point RF Gate Leakage = " + << FRF->rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.regWindowing) { + cout << indent_str_next << "Register Windows Peak Dynamic = " + << RFWIN->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Register Windows Subthreshold Leakage = " + << RFWIN->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Register Windows Gate Leakage = " + << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; } + } } +void EXECU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double pppm_t[4] = {1, 1, 1, 1}; + // rfu->power.reset(); + // rfu->rt_power.reset(); + // scheu->power.reset(); + // scheu->rt_power.reset(); + // exeu->power.reset(); + // exeu->rt_power.reset(); + + rfu->computeEnergy(is_tdp); + scheu->computeEnergy(is_tdp); + exeu->computeEnergy(is_tdp); + if (coredynp.num_fpus > 0) { + fp_u->computeEnergy(is_tdp); + } + if (coredynp.num_muls > 0) { + mul->computeEnergy(is_tdp); + } + + if (is_tdp) { + set_pppm( + pppm_t, 2 * coredynp.ALU_cdb_duty_cycle, 2, 2, + 2 * coredynp + .ALU_cdb_duty_cycle); // 2 means two source operands needs to be + // passed for each int instruction. + bypass.power = bypass.power + intTagBypass->power * pppm_t + + int_bypass->power * pppm_t; + if (coredynp.num_muls > 0) { + set_pppm( + pppm_t, 2 * coredynp.MUL_cdb_duty_cycle, 2, 2, + 2 * coredynp + .MUL_cdb_duty_cycle); // 2 means two source operands needs to + // be passed for each int instruction. + bypass.power = bypass.power + intTag_mul_Bypass->power * pppm_t + + int_mul_bypass->power * pppm_t; + power = power + mul->power; + } + if (coredynp.num_fpus > 0) { + set_pppm( + pppm_t, 3 * coredynp.FPU_cdb_duty_cycle, 3, 3, + 3 * coredynp + .FPU_cdb_duty_cycle); // 3 means three source operands needs + // to be passed for each fp instruction. + bypass.power = bypass.power + fp_bypass->power * pppm_t + + fpTagBypass->power * pppm_t; + power = power + fp_u->power; + } -void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << indent_str << "Data Cache:" << endl; - cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? dcache.power.readOp.power_gated_with_long_channel_leakage : dcache.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].load_buffer_size >0) - { - cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? LoadQ->power.readOp.power_gated_with_long_channel_leakage : LoadQ->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <power + exeu->power + bypass.power + scheu->power; + } else { + set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, + XML->sys.core[ithCore].cdb_alu_accesses); + bypass.rt_power = bypass.rt_power + intTagBypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + int_bypass->power * pppm_t; + + if (coredynp.num_muls > 0) { + set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, + XML->sys.core[ithCore] + .cdb_mul_accesses); // 2 means two source operands needs to + // be passed for each int instruction. + bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power * pppm_t + + int_mul_bypass->power * pppm_t; + rt_power = rt_power + mul->rt_power; + } + if (coredynp.num_fpus > 0) { + set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, + XML->sys.core[ithCore].cdb_fpu_accesses); + bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; + rt_power = rt_power + fp_u->rt_power; + } + rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + + scheu->rt_power; + } } -void MemManU::computeEnergy(bool is_tdp) -{ +void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + // cout << indent_str_next << "Results Broadcast Bus Area = " << + // bypass->area.get_area() *1e-6 << " mm^2" << endl; + if (is_tdp) { + cout << indent_str << "Register Files:" << endl; + cout << indent_str_next << "Area = " << rfu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << rfu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? rfu->power.readOp.longer_channel_leakage + : rfu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? rfu->power.readOp.power_gated_with_long_channel_leakage + : rfu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (plevel > 3) { + rfu->displayEnergy(indent + 4, is_tdp); + } + cout << indent_str << "Instruction Scheduler:" << endl; + cout << indent_str_next << "Area = " << scheu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << scheu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? scheu->power.readOp.longer_channel_leakage + : scheu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? scheu->power.readOp.power_gated_with_long_channel_leakage + : scheu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << scheu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 3) { + scheu->displayEnergy(indent + 4, is_tdp); + } + exeu->displayEnergy(indent, is_tdp); + if (coredynp.num_fpus > 0) { + fp_u->displayEnergy(indent, is_tdp); + } + if (coredynp.num_muls > 0) { + mul->displayEnergy(indent, is_tdp); + } + cout << indent_str << "Results Broadcast Bus:" << endl; + cout << indent_str_next + << "Area Overhead = " << bypass.area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << bypass.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? bypass.power.readOp.longer_channel_leakage + : bypass.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? bypass.power.readOp.power_gated_with_long_channel_leakage + : bypass.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << bypass.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else { + cout << indent_str_next << "Register Files Peak Dynamic = " + << rfu->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Register Files Subthreshold Leakage = " + << rfu->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Register Files Gate Leakage = " + << rfu->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " + << scheu->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " + << scheu->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Gate Leakage = " + << scheu->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " + << bypass.rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " + << bypass.rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " + << bypass.rt_power.readOp.gate_leakage << " W" << endl; + } +} - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports*coredynp.IFU_duty_cycle; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; - } - else - { - //init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; +void Core::computeEnergy(bool is_tdp) { + /* + * When computing TDP, power = energy_per_cycle (the value computed in this + * function) * clock_rate (in the display_energy function) When computing + * dyn_power; power = total energy (the value computed in this function) / + * Total execution time (cycle count / clock rate) + */ + // power_point_product_masks + double pppm_t[4] = {1, 1, 1, 1}; + double rtp_pipeline_coe; + double num_units = 4.0; + if (is_tdp) { + ifu->computeEnergy(is_tdp); + lsu->computeEnergy(is_tdp); + mmu->computeEnergy(is_tdp); + exu->computeEnergy(is_tdp); + + if (coredynp.core_ty == OOO) { + num_units = 5.0; + rnu->computeEnergy(is_tdp); + set_pppm( + pppm_t, coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / + num_units); // User need to feed a duty cycle to improve accuracy + if (rnu->exist) { + rnu->power = rnu->power + corepipe->power * pppm_t; + power = power + rnu->power; + } + } + + if (ifu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.IFU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + // cout << "IFU = " << + // ifu->power.readOp.dynamic*clockRate << " W" << endl; + ifu->power = ifu->power + corepipe->power * pppm_t; + // cout << "IFU = " << + // ifu->power.readOp.dynamic*clockRate << " W" << endl; + // cout << "1/4 pipe = " << + // corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; + power = power + ifu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (lsu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.LSU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + lsu->power = lsu->power + corepipe->power * pppm_t; + // cout << "LSU = " << + // lsu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + lsu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (exu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.ALU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + exu->power = exu->power + corepipe->power * pppm_t; + // cout << "EXE = " << + // exu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + exu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (mmu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * + (0.5 + 0.5 * coredynp.LSU_duty_cycle), + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + mmu->power = mmu->power + corepipe->power * pppm_t; + // cout << "MMU = " << + // mmu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + mmu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + + power = power + undiffCore->power; + + if (XML->sys.Private_L2) { + + l2cache->computeEnergy(is_tdp); + set_pppm(pppm_t, l2cache->cachep.clockRate / clockRate, 1, 1, 1); + // l2cache->power = l2cache->power*pppm_t; + power = power + l2cache->power * pppm_t; + } + + } else { + ifu->computeEnergy(is_tdp); + lsu->computeEnergy(is_tdp); + mmu->computeEnergy(is_tdp); + exu->computeEnergy(is_tdp); + + if (coredynp.core_ty == OOO) { + num_units = 5.0; + rnu->computeEnergy(is_tdp); + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + XML->sys.total_cycles * XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + if (rnu->exist) { + rnu->rt_power = rnu->rt_power + corepipe->power * pppm_t; + + rt_power = rt_power + rnu->rt_power; + } + } else { + num_units = 4.0; + } + + if (ifu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.IFU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.IFU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + ifu->rt_power = ifu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + ifu->rt_power; + } + if (lsu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.LSU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.LSU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + + lsu->rt_power = lsu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + lsu->rt_power; + } + if (exu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.ALU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.ALU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + exu->rt_power = exu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + exu->rt_power; } + if (mmu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + (0.5 + 0.5 * coredynp.LSU_duty_cycle) * + XML->sys.total_cycles * XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + (0.5 + 0.5 * coredynp.LSU_duty_cycle) * + coredynp.total_cycles; + } + set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + mmu->rt_power = mmu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + mmu->rt_power; + } + + rt_power = rt_power + undiffCore->power; + // cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" + //<< endl; + if (XML->sys.Private_L2) { - itlb->power_t.reset(); - dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - power = power + itlb->power + dtlb->power; - } - else - { - itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; - } + l2cache->computeEnergy(is_tdp); + // set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); + // l2cache->rt_power = l2cache->rt_power*pppm_t; + rt_power = rt_power + l2cache->rt_power; + } + } } -void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - - - if (is_tdp) - { - cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? itlb->power.readOp.power_gated_with_long_channel_leakage : itlb->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "Core:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (ifu->exist) { + cout << indent_str << "Instruction Fetch Unit:" << endl; + cout << indent_str_next << "Area = " << ifu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ifu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ifu->power.readOp.longer_channel_leakage + : ifu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ifu->power.readOp.power_gated_with_long_channel_leakage + : ifu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ifu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + ifu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (coredynp.core_ty == OOO) { + if (rnu->exist) { + cout << indent_str << "Renaming Unit:" << endl; + cout << indent_str_next << "Area = " << rnu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << rnu->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? rnu->power.readOp.longer_channel_leakage + : rnu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? rnu->power.readOp.power_gated_with_long_channel_leakage + : rnu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << rnu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + rnu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + } + if (lsu->exist) { + cout << indent_str << "Load Store Unit:" << endl; + cout << indent_str_next << "Area = " << lsu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << lsu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? lsu->power.readOp.longer_channel_leakage + : lsu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? lsu->power.readOp.power_gated_with_long_channel_leakage + : lsu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << lsu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + lsu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (mmu->exist) { + cout << indent_str << "Memory Management Unit:" << endl; + cout << indent_str_next << "Area = " << mmu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << mmu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? mmu->power.readOp.longer_channel_leakage + : mmu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? mmu->power.readOp.power_gated_with_long_channel_leakage + : mmu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << mmu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + mmu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (exu->exist) { + cout << indent_str << "Execution Unit:" << endl; + cout << indent_str_next << "Area = " << exu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << exu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? exu->power.readOp.longer_channel_leakage + : exu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? exu->power.readOp.power_gated_with_long_channel_leakage + : exu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << exu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + exu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + // if (plevel >2) + // { + // if (undiffCore->exist) + // { + // cout << indent_str << "Undifferentiated Core" << + // endl; cout << indent_str_next << "Area = " << + // undiffCore->area.get_area()*1e-6<< " mm^2" << endl; cout + // << indent_str_next << "Peak Dynamic = " << + // undiffCore->power.readOp.dynamic*clockRate << " W" << endl; + //// cout << indent_str_next << "Subthreshold Leakage = " + ///<< undiffCore->power.readOp.leakage <<" W" << endl; + // cout << indent_str_next << "Subthreshold Leakage + //= + //" + // << + //(long_channel? + // undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) + //<< " W" << endl; cout << indent_str_next << "Gate Leakage = " + //<< undiffCore->power.readOp.gate_leakage << " W" << endl; + // // cout << indent_str_next << "Runtime Dynamic = " + //<< undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; + // cout + //<sys.Private_L2) { + + l2cache->displayEnergy(4, is_tdp); + } + } else { + // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = + //" + //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " + // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << + // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next + //<< "Load Store Unit Peak Dynamic = " << + // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << + // lsu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Load Store Unit Gate Leakage = " << + // lsu->rt_power.readOp.gate_leakage + //<< " W" << endl; cout << indent_str_next << "Memory Management Unit + // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << + // endl; cout << indent_str_next << "Memory Management Unit Subthreshold + // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout + // << indent_str_next << "Memory Management Unit Gate Leakage = " << + // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Execution Unit Peak Dynamic = " << + // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Execution Unit Subthreshold Leakage = " << + // exu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Execution Unit Gate Leakage = " << + // exu->rt_power.readOp.gate_leakage + //<< " W" << endl; + } } +InstFetchU ::~InstFetchU() { -void RegFU::computeEnergy(bool is_tdp) -{ -/* - * Architecture RF and physical RF cannot be present at the same time. - * Therefore, the RF stats can only refer to either ARF or PRF; - * And the same stats can be used for both. - */ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - //Rule of Thumb: about 10% RF related instructions do not need to access ALUs - IRF->tdp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->rtp_stats = RFWIN->stats_t; - - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + - XML->sys.core[ithCore].function_calls*16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + - XML->sys.core[ithCore].function_calls*16; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + - XML->sys.core[ithCore].function_calls*16;; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+ - XML->sys.core[ithCore].function_calls*16;; - FRF->rtp_stats = FRF->stats_t; - } + if (!exist) + return; + if (IB) { + delete IB; + IB = 0; + } + if (ID_inst) { + delete ID_inst; + ID_inst = 0; + } + if (ID_operand) { + delete ID_operand; + ID_operand = 0; + } + if (ID_misc) { + delete ID_misc; + ID_misc = 0; + } + if (coredynp.predictionW > 0) { + if (BTB) { + delete BTB; + BTB = 0; + } + if (BPT) { + delete BPT; + BPT = 0; } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic - +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic - +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic); - if (coredynp.regWindowing) - { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - IRF->power = IRF->power_t + ((coredynp.scheu_ty==ReservationStation) ? (IRF->local_result.power *coredynp.pppm_lkg_multhread):IRF->local_result.power); - FRF->power = FRF->power_t + ((coredynp.scheu_ty==ReservationStation) ? (FRF->local_result.power *coredynp.pppm_lkg_multhread):FRF->local_result.power); - power = power + (IRF->power + FRF->power); - if (coredynp.regWindowing) - { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - power = power + RFWIN->power; - } - } - else - { - IRF->rt_power = IRF->power_t + ((coredynp.scheu_ty==ReservationStation) ? (IRF->local_result.power *coredynp.pppm_lkg_multhread):IRF->local_result.power); - FRF->rt_power = FRF->power_t + ((coredynp.scheu_ty==ReservationStation) ? (FRF->local_result.power *coredynp.pppm_lkg_multhread):FRF->local_result.power); - rt_power = rt_power + (IRF->power_t + FRF->power_t); - if (coredynp.regWindowing) - { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; - } - } + } } +BranchPredictor ::~BranchPredictor() { -void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? IRF->power.readOp.power_gated_with_long_channel_leakage : IRF->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <power.reset(); -// rfu->rt_power.reset(); -// scheu->power.reset(); -// scheu->rt_power.reset(); -// exeu->power.reset(); -// exeu->rt_power.reset(); - - rfu->computeEnergy(is_tdp); - scheu->computeEnergy(is_tdp); - exeu->computeEnergy(is_tdp); - if (coredynp.num_fpus >0) - { - fp_u->computeEnergy(is_tdp); - } - if (coredynp.num_muls >0) - { - mul->computeEnergy(is_tdp); - } - - if (is_tdp) - { - set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t; - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - power = power + mul->power; - } - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ; - power = power + fp_u->power; - } - - power = power + rfu->power + exeu->power + bypass.power + scheu->power; - } - else - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t; - - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - rt_power = rt_power + mul->rt_power; - } - - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t; - rt_power = rt_power + fp_u->rt_power; - } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power; - } + if (!exist) + return; + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (iRRAT) { + delete iRRAT; + iRRAT = 0; + } + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (ifreeL) { + delete ifreeL; + ifreeL = 0; + } + if (idcl) { + delete idcl; + idcl = 0; + } + if (fFRAT) { + delete fFRAT; + fFRAT = 0; + } + if (fRRAT) { + delete fRRAT; + fRRAT = 0; + } + if (fdcl) { + delete fdcl; + fdcl = 0; + } + if (ffreeL) { + delete ffreeL; + ffreeL = 0; + } + if (RAHT) { + delete RAHT; + RAHT = 0; + } } -void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? rfu->power.readOp.power_gated_with_long_channel_leakage : rfu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - rfu->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? scheu->power.readOp.power_gated_with_long_channel_leakage : scheu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - scheu->displayEnergy(indent+4,is_tdp); - } - exeu->displayEnergy(indent,is_tdp); - if (coredynp.num_fpus>0) - { - fp_u->displayEnergy(indent,is_tdp); - } - if (coredynp.num_muls >0) - { - mul->displayEnergy(indent,is_tdp); - } - cout << indent_str << "Results Broadcast Bus:" << endl; - cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? bypass.power.readOp.power_gated_with_long_channel_leakage : bypass.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);//User need to feed a duty cycle to improve accuracy - if (rnu->exist) - { - rnu->power = rnu->power + corepipe->power*pppm_t; - power = power + rnu->power; - } - } - - if (ifu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power*pppm_t; -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; -// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (lsu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - lsu->power = lsu->power + corepipe->power*pppm_t; -// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (exu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - exu->power = exu->power + corepipe->power*pppm_t; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (mmu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - mmu->power = mmu->power + corepipe->power*pppm_t; -// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - - power = power + undiffCore->power; - - if (XML->sys.Private_L2) - { - - l2cache->computeEnergy(is_tdp); - set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1); - //l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power*pppm_t; - } - - } - else - { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t; - - rt_power = rt_power + rnu->rt_power; - } - } - else - { - num_units = 4.0; - } - - if (ifu->exist) - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.IFU_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.IFU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + ifu->rt_power ; - } - if (lsu->exist) - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.LSU_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.LSU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - - lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + lsu->rt_power; - } - if (exu->exist) - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.ALU_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.ALU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - exu->rt_power = exu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + exu->rt_power; - } - if (mmu->exist) - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * (0.5+0.5*coredynp.LSU_duty_cycle) * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * (0.5+0.5*coredynp.LSU_duty_cycle) * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + mmu->rt_power ; - - } - - rt_power = rt_power + undiffCore->power; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - if (XML->sys.Private_L2) - { - - l2cache->computeEnergy(is_tdp); - //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - //l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; - } - } +MemManU ::~MemManU() { + if (!exist) + return; + if (itlb) { + delete itlb; + itlb = 0; + } + if (dtlb) { + delete dtlb; + dtlb = 0; + } } -void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << "Core:" << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout<exist) - { - cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? ifu->power.readOp.power_gated_with_long_channel_leakage : ifu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - ifu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - cout << indent_str<< "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? rnu->power.readOp.power_gated_with_long_channel_leakage : rnu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - rnu->displayEnergy(indent+4,plevel,is_tdp); - } - } - - } - if (lsu->exist) - { - cout << indent_str<< "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? lsu->power.readOp.power_gated_with_long_channel_leakage : lsu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - lsu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (mmu->exist) - { - cout << indent_str<< "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? mmu->power.readOp.power_gated_with_long_channel_leakage : mmu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - mmu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (exu->exist) - { - cout << indent_str<< "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? exu->power.readOp.power_gated_with_long_channel_leakage : exu->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - exu->displayEnergy(indent+4,plevel,is_tdp); - } - } -// if (plevel >2) -// { -// if (undiffCore->exist) -// { -// cout << indent_str << "Undifferentiated Core" << endl; -// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl; -// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl; -//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl; -// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl; -// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; -// cout <sys.Private_L2) - { - - l2cache->displayEnergy(4,is_tdp); - } - - } - else - { -// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl; - } +RegFU ::~RegFU() { + + if (!exist) + return; + if (IRF) { + delete IRF; + IRF = 0; + } + if (FRF) { + delete FRF; + FRF = 0; + } + if (RFWIN) { + delete RFWIN; + RFWIN = 0; + } } -InstFetchU ::~InstFetchU(){ - - if (!exist) return; - if(IB) {delete IB; IB = 0;} - if(ID_inst) {delete ID_inst; ID_inst = 0;} - if(ID_operand) {delete ID_operand; ID_operand = 0;} - if(ID_misc) {delete ID_misc; ID_misc = 0;} - if (coredynp.predictionW>0) - { - if(BTB) {delete BTB; BTB = 0;} - if(BPT) {delete BPT; BPT = 0;} - } + +SchedulerU ::~SchedulerU() { + + if (!exist) + return; + if (int_inst_window) { + delete int_inst_window; + int_inst_window = 0; + } + if (fp_inst_window) { + delete fp_inst_window; + fp_inst_window = 0; + } + if (ROB) { + delete ROB; + ROB = 0; + } + if (instruction_selection) { + delete instruction_selection; + instruction_selection = 0; + } +} + +EXECU ::~EXECU() { + + if (!exist) + return; + if (int_bypass) { + delete int_bypass; + int_bypass = 0; + } + if (intTagBypass) { + delete intTagBypass; + intTagBypass = 0; + } + if (int_mul_bypass) { + delete int_mul_bypass; + int_mul_bypass = 0; + } + if (intTag_mul_Bypass) { + delete intTag_mul_Bypass; + intTag_mul_Bypass = 0; + } + if (fp_bypass) { + delete fp_bypass; + fp_bypass = 0; + } + if (fpTagBypass) { + delete fpTagBypass; + fpTagBypass = 0; + } + if (fp_u) { + delete fp_u; + fp_u = 0; + } + if (exeu) { + delete exeu; + exeu = 0; + } + if (mul) { + delete mul; + mul = 0; + } + if (rfu) { + delete rfu; + rfu = 0; + } + if (scheu) { + delete scheu; + scheu = 0; + } } -BranchPredictor ::~BranchPredictor(){ - - if (!exist) return; - if(globalBPT) {delete globalBPT; globalBPT = 0;} - if(localBPT) {delete localBPT; localBPT = 0;} - if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;} - if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;} - if(chooser) {delete chooser; chooser = 0;} - if(RAS) {delete RAS; RAS = 0;} - } - -RENAMINGU ::~RENAMINGU(){ - - if (!exist) return; - if(iFRAT ) {delete iFRAT; iFRAT = 0;} - if(iRRAT) {delete iRRAT; iRRAT = 0;} - if(iFRAT) {delete iFRAT; iFRAT = 0;} - if(ifreeL) {delete ifreeL;ifreeL= 0;} - if(idcl) {delete idcl; idcl = 0;} - if(fFRAT ) {delete fFRAT; fFRAT =0;} - if(fRRAT ) {delete fRRAT; fRRAT =0;} - if(fdcl) {delete fdcl; fdcl = 0;} - if(ffreeL) {delete ffreeL;ffreeL= 0;} - if(RAHT) {delete RAHT; RAHT = 0;} - } - -LoadStoreU ::~LoadStoreU(){ - - if (!exist) return; - if(LSQ) {delete LSQ; LSQ = 0;} - if(LoadQ) {delete LoadQ; LoadQ = 0;} +Core ::~Core() { + + if (ifu) { + delete ifu; + ifu = 0; + } + if (lsu) { + delete lsu; + lsu = 0; + } + if (rnu) { + delete rnu; + rnu = 0; + } + if (mmu) { + delete mmu; + mmu = 0; + } + if (exu) { + delete exu; + exu = 0; + } + if (corepipe) { + delete corepipe; + corepipe = 0; + } + if (undiffCore) { + delete undiffCore; + undiffCore = 0; + } + if (l2cache) { + delete l2cache; + l2cache = 0; + } } -MemManU ::~MemManU(){ - - if (!exist) return; - if(itlb) {delete itlb; itlb = 0;} - if(dtlb) {delete dtlb; dtlb = 0;} - } - -RegFU ::~RegFU(){ - - if (!exist) return; - if(IRF) {delete IRF; IRF = 0;} - if(FRF) {delete FRF; FRF = 0;} - if(RFWIN) {delete RFWIN; RFWIN = 0;} - } - -SchedulerU ::~SchedulerU(){ - - if (!exist) return; - if(int_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(fp_inst_window) {delete fp_inst_window; fp_inst_window = 0;} - if(ROB) {delete ROB; ROB = 0;} - if(instruction_selection) {delete instruction_selection;instruction_selection = 0;} - } - -EXECU ::~EXECU(){ - - if (!exist) return; - if(int_bypass) {delete int_bypass; int_bypass = 0;} - if(intTagBypass) {delete intTagBypass; intTagBypass =0;} - if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;} - if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;} - if(fp_bypass) {delete fp_bypass;fp_bypass = 0;} - if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;} - if(fp_u) {delete fp_u;fp_u = 0;} - if(exeu) {delete exeu;exeu = 0;} - if(mul) {delete mul;mul = 0;} - if(rfu) {delete rfu;rfu = 0;} - if(scheu) {delete scheu; scheu = 0;} - } - -Core ::~Core(){ - - if(ifu) {delete ifu; ifu = 0;} - if(lsu) {delete lsu; lsu = 0;} - if(rnu) {delete rnu; rnu = 0;} - if(mmu) {delete mmu; mmu = 0;} - if(exu) {delete exu; exu = 0;} - if(corepipe) {delete corepipe; corepipe = 0;} - if(undiffCore) {delete undiffCore;undiffCore = 0;} - if(l2cache) {delete l2cache;l2cache = 0;} - } - -void Core::set_core_param() -{ - coredynp.opt_local = XML->sys.core[ithCore].opt_local; - coredynp.x86 = XML->sys.core[ithCore].x86; - coredynp.Embedded = XML->sys.Embedded; - coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; - coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; - coredynp.fetchW = XML->sys.core[ithCore].fetch_width; - coredynp.decodeW = XML->sys.core[ithCore].decode_width; - coredynp.issueW = XML->sys.core[ithCore].issue_width; - coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; - coredynp.commitW = XML->sys.core[ithCore].commit_width; - coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; - coredynp.predictionW = XML->sys.core[ithCore].prediction_width; - coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; - coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; - coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; - coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; - coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; - coredynp.vdd = XML->sys.core[ithCore].vdd; - coredynp.power_gating_vcc = XML->sys.core[ithCore].power_gating_vcc; - - - - coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; - coredynp.multithreaded = coredynp.num_hthreads>1? true:false; - coredynp.hthread_width = int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))); - coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; - coredynp.pc_width = XML->sys.virtual_address_width; - - coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; - coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; - coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; - coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; - coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; - coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; - coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32; - coredynp.fp_data_width = coredynp.int_data_width; - coredynp.v_address_width = XML->sys.virtual_address_width; - coredynp.p_address_width = XML->sys.physical_address_width; - - coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; - coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); - coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); - coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; - coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; - coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; - coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; - coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; - coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; - - //Max power duty cycle for peak power estimation -// if (coredynp.core_ty==OOO) -// { -// coredynp.IFU_duty_cycle = 1; -// coredynp.LSU_duty_cycle = 1; -// coredynp.MemManU_I_duty_cycle =1; -// coredynp.MemManU_D_duty_cycle =1; -// coredynp.ALU_duty_cycle =1; -// coredynp.MUL_duty_cycle =1; -// coredynp.FPU_duty_cycle =1; -// coredynp.ALU_cdb_duty_cycle =1; -// coredynp.MUL_cdb_duty_cycle =1; -// coredynp.FPU_cdb_duty_cycle =1; -// } -// else -// { - coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; - coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; - coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; - coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; - coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; - coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; - coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; - coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; - coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; - coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; - coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; -// } - - - if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder))) - { - cout<<"Invalid Core Type"<sys.core[ithCore].phy_Regs_IRF_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); - coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size; - coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size; - } - else if (coredynp.scheu_ty==ReservationStation) - {//ROB serves as Phy RF in RS based OOO - coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; - coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; - - } - - } - - int GC_count=XML->sys.core[ithCore].checkpoint_depth;//best check pointing entries for a 4~8 issue OOO should be 8~48;See TR for reference. - if (coredynp.rm_ty ==RAMbased) - { - coredynp.globalCheckpoint = GC_count > 4 ? 4 : GC_count; //RAM-based RAT cannot have more than 4 GCs; see "a power-aware hybrid ram-cam renaming mechanism for fast recovery" - } - else if(coredynp.rm_ty ==CAMbased) - { - coredynp.globalCheckpoint = GC_count < 1 ? 1 : GC_count; - } - - coredynp.perThreadState = 8; - coredynp.instruction_length = 32; - coredynp.clockRate = XML->sys.core[ithCore].clock_rate; - coredynp.clockRate *= 1e6; - coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false; - coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0); - - //does not care device types, since all core device types are set at sys. level - if (coredynp.vdd > 0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = coredynp.vdd; - interface_ip.lop_Vdd = coredynp.vdd; - interface_ip.lstp_Vdd = coredynp.vdd; - } - - if (coredynp.power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = coredynp.power_gating_vcc; - } +void Core::set_core_param() { + coredynp.opt_local = XML->sys.core[ithCore].opt_local; + coredynp.x86 = XML->sys.core[ithCore].x86; + coredynp.Embedded = XML->sys.Embedded; + coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; + coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; + coredynp.fetchW = XML->sys.core[ithCore].fetch_width; + coredynp.decodeW = XML->sys.core[ithCore].decode_width; + coredynp.issueW = XML->sys.core[ithCore].issue_width; + coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; + coredynp.commitW = XML->sys.core[ithCore].commit_width; + coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; + coredynp.predictionW = XML->sys.core[ithCore].prediction_width; + coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; + coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; + coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; + coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; + coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; + coredynp.vdd = XML->sys.core[ithCore].vdd; + coredynp.power_gating_vcc = XML->sys.core[ithCore].power_gating_vcc; + + coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; + coredynp.multithreaded = coredynp.num_hthreads > 1 ? true : false; + coredynp.hthread_width = + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))); + coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; + coredynp.pc_width = XML->sys.virtual_address_width; + + coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; + coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; + coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; + coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; + coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; + coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; + coredynp.int_data_width = int(ceil(XML->sys.machine_bits / 32.0)) * 32; + coredynp.fp_data_width = coredynp.int_data_width; + coredynp.v_address_width = XML->sys.virtual_address_width; + coredynp.p_address_width = XML->sys.physical_address_width; + + coredynp.scheu_ty = + (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; + coredynp.arch_ireg_width = + int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); + coredynp.arch_freg_width = + int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); + coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; + coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; + coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; + coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; + coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; + coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; + + // Max power duty cycle for peak power estimation + // if (coredynp.core_ty==OOO) + // { + // coredynp.IFU_duty_cycle = 1; + // coredynp.LSU_duty_cycle = 1; + // coredynp.MemManU_I_duty_cycle =1; + // coredynp.MemManU_D_duty_cycle =1; + // coredynp.ALU_duty_cycle =1; + // coredynp.MUL_duty_cycle =1; + // coredynp.FPU_duty_cycle =1; + // coredynp.ALU_cdb_duty_cycle =1; + // coredynp.MUL_cdb_duty_cycle =1; + // coredynp.FPU_cdb_duty_cycle =1; + // } + // else + // { + coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; + coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; + coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; + coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; + coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; + coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; + coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; + coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; + coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; + coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; + coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; + // } + + if (!((coredynp.core_ty == OOO) || (coredynp.core_ty == Inorder))) { + cout << "Invalid Core Type" << endl; + exit(0); + } + // if (coredynp.core_ty==OOO) + // { + // cout<<"OOO processor models are being updated and will be + // available in next release"<sys.core[ithCore].phy_Regs_IRF_size))); + coredynp.phy_freg_width = + int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); + coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = + XML->sys.core[ithCore].phy_Regs_IRF_size; + coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = + XML->sys.core[ithCore].phy_Regs_FRF_size; + } else if (coredynp.scheu_ty == + ReservationStation) { // ROB serves as Phy RF in RS based OOO + coredynp.phy_ireg_width = + int(ceil(log2(XML->sys.core[ithCore].ROB_size))); + coredynp.phy_freg_width = + int(ceil(log2(XML->sys.core[ithCore].ROB_size))); + coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; + coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; + } + } + + int GC_count = + XML->sys.core[ithCore] + .checkpoint_depth; // best check pointing entries for a 4~8 issue OOO + // should be 8~48;See TR for reference. + if (coredynp.rm_ty == RAMbased) { + coredynp.globalCheckpoint = + GC_count > 4 ? 4 : GC_count; // RAM-based RAT cannot have more than 4 + // GCs; see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + } else if (coredynp.rm_ty == CAMbased) { + coredynp.globalCheckpoint = GC_count < 1 ? 1 : GC_count; + } + + coredynp.perThreadState = 8; + coredynp.instruction_length = 32; + coredynp.clockRate = XML->sys.core[ithCore].clock_rate; + coredynp.clockRate *= 1e6; + coredynp.regWindowing = (XML->sys.core[ithCore].register_windows_size > 0 && + coredynp.core_ty == Inorder) + ? true + : false; + coredynp.executionTime = XML->sys.total_cycles / coredynp.clockRate; + set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, + coredynp.num_hthreads, 0); + + // does not care device types, since all core device types are set at sys. + // level + if (coredynp.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = coredynp.vdd; + interface_ip.lop_Vdd = coredynp.vdd; + interface_ip.lstp_Vdd = coredynp.vdd; + } + + if (coredynp.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = coredynp.power_gating_vcc; + } } diff --git a/core.h b/core.h index cbcfa95..f81cf2d 100644 --- a/core.h +++ b/core.h @@ -29,234 +29,234 @@ * ***************************************************************************/ - #ifndef CORE_H_ #define CORE_H_ #include "XML_Parse.h" -#include "logic.h" -#include "parameter.h" #include "array.h" -#include "interconnect.h" #include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" #include "sharedcache.h" -class BranchPredictor :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * globalBPT; - ArrayST * localBPT; - ArrayST * L1_localBPT; - ArrayST * L2_localBPT; - ArrayST * chooser; - ArrayST * RAS; - bool exist; - - BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~BranchPredictor(); +class BranchPredictor : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + ArrayST *globalBPT; + ArrayST *localBPT; + ArrayST *L1_localBPT; + ArrayST *L2_localBPT; + ArrayST *chooser; + ArrayST *RAS; + bool exist; + + BranchPredictor(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exsit = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~BranchPredictor(); }; - -class InstFetchU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Cache_policy cache_p; - InstCache icache; - ArrayST * IB; - ArrayST * BTB; - BranchPredictor * BPT; - inst_decoder * ID_inst; - inst_decoder * ID_operand; - inst_decoder * ID_misc; - bool exist; - - InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~InstFetchU(); +class InstFetchU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + enum Cache_policy cache_p; + InstCache icache; + ArrayST *IB; + ArrayST *BTB; + BranchPredictor *BPT; + inst_decoder *ID_inst; + inst_decoder *ID_operand; + inst_decoder *ID_misc; + bool exist; + + InstFetchU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exsit = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~InstFetchU(); }; - -class SchedulerU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double Iw_height, fp_Iw_height,ROB_height; - ArrayST * int_inst_window; - ArrayST * fp_inst_window; - ArrayST * ROB; - selection_logic * instruction_selection; - bool exist; - - SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~SchedulerU(); +class SchedulerU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double Iw_height, fp_Iw_height, ROB_height; + ArrayST *int_inst_window; + ArrayST *fp_inst_window; + ArrayST *ROB; + selection_logic *instruction_selection; + bool exist; + + SchedulerU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~SchedulerU(); }; -class RENAMINGU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - CoreDynParam coredynp; - ArrayST * iFRAT; - ArrayST * fFRAT; - ArrayST * iRRAT; - ArrayST * fRRAT; - ArrayST * ifreeL; - ArrayST * ffreeL; - dep_resource_conflict_check * idcl; - dep_resource_conflict_check * fdcl; - ArrayST * RAHT;//register alias history table Used to store GC - bool exist; - - - RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RENAMINGU(); +class RENAMINGU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate, executionTime; + CoreDynParam coredynp; + ArrayST *iFRAT; + ArrayST *fFRAT; + ArrayST *iRRAT; + ArrayST *fRRAT; + ArrayST *ifreeL; + ArrayST *ffreeL; + dep_resource_conflict_check *idcl; + dep_resource_conflict_check *fdcl; + ArrayST *RAHT; // register alias history table Used to store GC + bool exist; + + RENAMINGU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RENAMINGU(); }; -class LoadStoreU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - enum Cache_policy cache_p; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - DataCache dcache; - ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - ArrayST * LoadQ; - bool exist; - - LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~LoadStoreU(); +class LoadStoreU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + enum Cache_policy cache_p; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; + DataCache dcache; + ArrayST *LSQ; // it is actually the store queue but for inorder processors it + // serves as both loadQ and StoreQ + ArrayST *LoadQ; + bool exist; + + LoadStoreU(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~LoadStoreU(); }; -class MemManU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * itlb; - ArrayST * dtlb; - bool exist; - - MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MemManU(); +class MemManU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + ArrayST *itlb; + ArrayST *dtlb; + bool exist; + + MemManU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MemManU(); }; -class RegFU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double int_regfile_height, fp_regfile_height; - ArrayST * IRF; - ArrayST * FRF; - ArrayST * RFWIN; - bool exist; - - RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RegFU(); +class RegFU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double int_regfile_height, fp_regfile_height; + ArrayST *IRF; + ArrayST *FRF; + ArrayST *RFWIN; + bool exist; + + RegFU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RegFU(); }; -class EXECU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - CoreDynParam coredynp; - RegFU * rfu; - SchedulerU * scheu; - FunctionalUnit * fp_u; - FunctionalUnit * exeu; - FunctionalUnit * mul; - interconnect * int_bypass; - interconnect * intTagBypass; - interconnect * int_mul_bypass; - interconnect * intTag_mul_Bypass; - interconnect * fp_bypass; - interconnect * fpTagBypass; - - Component bypass; - bool exist; - - EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~EXECU(); +class EXECU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; + CoreDynParam coredynp; + RegFU *rfu; + SchedulerU *scheu; + FunctionalUnit *fp_u; + FunctionalUnit *exeu; + FunctionalUnit *mul; + interconnect *int_bypass; + interconnect *intTagBypass; + interconnect *int_mul_bypass; + interconnect *intTag_mul_Bypass; + interconnect *fp_bypass; + interconnect *fpTagBypass; + + Component bypass; + bool exist; + + EXECU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, + double lsq_height_, const CoreDynParam &dyn_p_, bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~EXECU(); }; - -class Core :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - InstFetchU * ifu; - LoadStoreU * lsu; - MemManU * mmu; - EXECU * exu; - RENAMINGU * rnu; - Pipeline * corepipe; - UndiffCore * undiffCore; - SharedCache * l2cache; - CoreDynParam coredynp; - //full_decoder inst_decoder; - //clock_network clockNetwork; - Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_); - void set_core_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~Core(); +class Core : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + InstFetchU *ifu; + LoadStoreU *lsu; + MemManU *mmu; + EXECU *exu; + RENAMINGU *rnu; + Pipeline *corepipe; + UndiffCore *undiffCore; + SharedCache *l2cache; + CoreDynParam coredynp; + // full_decoder inst_decoder; + // clock_network clockNetwork; + Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_); + void set_core_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~Core(); }; #endif /* CORE_H_ */ diff --git a/globalvar.h b/globalvar.h index 9532576..e0b76c2 100644 --- a/globalvar.h +++ b/globalvar.h @@ -29,11 +29,10 @@ * ***************************************************************************/ - #ifndef GLOBALVAR_H_ #define GLOBALVAR_H_ -#ifdef GLOBALVAR +#ifdef GLOBALVAR #define EXTERN #else #define EXTERN extern @@ -42,7 +41,3 @@ EXTERN bool opt_for_clk; #endif /* GLOBALVAR_H_ */ - - - - diff --git a/interconnect.cc b/interconnect.cc index affe59b..19a5847 100644 --- a/interconnect.cc +++ b/interconnect.cc @@ -29,63 +29,43 @@ * ***************************************************************************/ - #include "interconnect.h" + +#include "globalvar.h" #include "wire.h" + #include #include -#include "globalvar.h" -interconnect::interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, - int data_w, double len,const InputParameter *configure_interface, - int start_wiring_level_, - bool pipelinable_ , - double route_over_perc_ , - bool opt_local_, - enum Core_type core_ty_, - enum Wire_type wire_model, - double width_s, double space_s, - TechnologyParameter::DeviceType *dt -) - :name(name_), - device_ty(device_ty_), - in_rise_time(0), - out_rise_time(0), - base_width(base_w), - base_height(base_h), - data_width(data_w), - wt(wire_model), - width_scaling(width_s), - space_scaling(space_s), - start_wiring_level(start_wiring_level_), - length(len), - //interconnect_latency(1e-12), - //interconnect_throughput(1e-12), - opt_local(opt_local_), - core_ty(core_ty_), - pipelinable(pipelinable_), - route_over_perc(route_over_perc_), - deviceType(dt) -{ +interconnect::interconnect(string name_, enum Device_ty device_ty_, + double base_w, double base_h, int data_w, double len, + const InputParameter *configure_interface, + int start_wiring_level_, bool pipelinable_, + double route_over_perc_, bool opt_local_, + enum Core_type core_ty_, enum Wire_type wire_model, + double width_s, double space_s, + TechnologyParameter::DeviceType *dt) + : name(name_), device_ty(device_ty_), in_rise_time(0), out_rise_time(0), + base_width(base_w), base_height(base_h), data_width(data_w), + wt(wire_model), width_scaling(width_s), space_scaling(space_s), + start_wiring_level(start_wiring_level_), length(len), + // interconnect_latency(1e-12), + // interconnect_throughput(1e-12), + opt_local(opt_local_), core_ty(core_ty_), pipelinable(pipelinable_), + route_over_perc(route_over_perc_), deviceType(dt) { wt = Global; - l_ip=*configure_interface; + l_ip = *configure_interface; local_result = init_interface(&l_ip); - - max_unpipelined_link_delay = 0; //TODO + max_unpipelined_link_delay = 0; // TODO min_w_nmos = g_tp.min_w_nmos_; min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - - - latency = l_ip.latency; - throughput = l_ip.throughput; - latency_overflow=false; - throughput_overflow=false; + latency = l_ip.latency; + throughput = l_ip.throughput; + latency_overflow = false; + throughput_overflow = false; /* * TODO: Add wiring option from semi-global to global automatically @@ -96,67 +76,62 @@ interconnect::interconnect( * not have fat wires. */ if (pipelinable == false) - //Non-pipelinable wires, such as bypass logic, care latency + // Non-pipelinable wires, such as bypass logic, care latency { - compute(); - if (opt_for_clk && opt_local) - { - while (delay > latency && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > latency) - { - latency_overflow=true; - } - } - } - else //Pipelinable wires, such as bus, does not care latency but throughput + compute(); + if (opt_for_clk && opt_local) { + while (delay > latency && width_scaling < 3.0) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + compute(); + } + if (delay > latency) { + latency_overflow = true; + } + } + } else // Pipelinable wires, such as bus, does not care latency but throughput { - /* - * TODO: Add pipe regs power, area, and timing; - * Pipelinable wires optimize latency first. - */ - compute(); - if (opt_for_clk && opt_local) - { - while (delay > throughput && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > throughput) - // insert pipeline stages - { - num_pipe_stages = (int)ceil(delay/throughput); - assert(num_pipe_stages>0); - delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay; - } - } + /* + * TODO: Add pipe regs power, area, and timing; + * Pipelinable wires optimize latency first. + */ + compute(); + if (opt_for_clk && opt_local) { + while (delay > throughput && width_scaling < 3.0) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + compute(); + } + if (delay > throughput) + // insert pipeline stages + { + num_pipe_stages = (int)ceil(delay / throughput); + assert(num_pipe_stages > 0); + delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay; + } + } } power_bit = power; power.readOp.dynamic *= data_width; power.readOp.leakage *= data_width; power.readOp.gate_leakage *= data_width; - area.set_area(area.get_area()*data_width); + area.set_area(area.get_area() * data_width); no_device_under_wire_area.h *= data_width; - if (latency_overflow==true) - cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl; - + if (latency_overflow == true) + cout << "Warning: " << name + << " wire structure cannot satisfy latency constraint." << endl; assert(power.readOp.dynamic > 0); assert(power.readOp.leakage > 0); assert(power.readOp.gate_leakage > 0); - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - double pg_reduction = power_gating_leakage_reduction(false);// + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + double pg_reduction = power_gating_leakage_reduction(false); // double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; @@ -164,25 +139,22 @@ interconnect::interconnect( power.searchOp.dynamic *= sckRation; power.readOp.longer_channel_leakage = - power.readOp.leakage*long_channel_device_reduction; + power.readOp.leakage * long_channel_device_reduction; - power.readOp.power_gated_leakage = - power.readOp.leakage*pg_reduction; + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; power.readOp.power_gated_with_long_channel_leakage = - power.readOp.power_gated_leakage*long_channel_device_reduction; + power.readOp.power_gated_leakage * long_channel_device_reduction; - if (pipelinable)//Only global wires has the option to choose whether routing over or not - area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc)); + if (pipelinable) // Only global wires has the option to choose whether routing + // over or not + area.set_area(area.get_area() * route_over_perc + + no_device_under_wire_area.get_area() * (1 - route_over_perc)); Wire wreset(); } - - -void -interconnect::compute() -{ +void interconnect::compute() { Wire *wtemp1 = 0; wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); @@ -192,17 +164,17 @@ interconnect::compute() power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage; area.set_area(wtemp1->area.get_area()); - no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); + no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); no_device_under_wire_area.w = length; if (wtemp1) - delete wtemp1; - + delete wtemp1; } -void interconnect::leakage_feedback(double temperature)//TODO: add code for processing power gating +void interconnect::leakage_feedback( + double temperature) // TODO: add code for processing power gating { - l_ip.temp = (unsigned int)round(temperature/10.0)*10; + l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&l_ip); // init_result is dummy compute(); @@ -216,13 +188,14 @@ void interconnect::leakage_feedback(double temperature)//TODO: add code for proc assert(power.readOp.leakage > 0); assert(power.readOp.gate_leakage > 0); - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; power.writeOp.dynamic *= sckRation; power.searchOp.dynamic *= sckRation; - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; } - diff --git a/interconnect.h b/interconnect.h index c3644f7..972f449 100644 --- a/interconnect.h +++ b/interconnect.h @@ -29,83 +29,72 @@ * ***************************************************************************/ - #ifndef __INTERCONNECT_H__ #define __INTERCONNECT_H__ +#include "assert.h" #include "basic_circuit.h" #include "basic_components.h" +#include "cacti_interface.h" #include "component.h" #include "parameter.h" -#include "assert.h" #include "subarray.h" -#include "cacti_interface.h" #include "wire.h" // leakge power includes entire htree in a bank (when uca_tree == false) // leakge power includes only part to one bank when uca_tree == true -class interconnect : public Component -{ - public: - interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, int data_w, double len, - const InputParameter *configure_interface, int start_wiring_level_, - bool pipelinable_ = false, - double route_over_perc_ =0.5, - bool opt_local_=true, - enum Core_type core_ty_=Inorder, - enum Wire_type wire_model=Global, - double width_s=1.0, double space_s=1.0, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ); +class interconnect : public Component { +public: + interconnect(string name_, enum Device_ty device_ty_, double base_w, + double base_h, int data_w, double len, + const InputParameter *configure_interface, + int start_wiring_level_, bool pipelinable_ = false, + double route_over_perc_ = 0.5, bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, + enum Wire_type wire_model = Global, double width_s = 1.0, + double space_s = 1.0, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); - ~interconnect() {}; + ~interconnect(){}; - void compute(); - string name; - enum Device_ty device_ty; - double in_rise_time, out_rise_time; - InputParameter l_ip; - uca_org_t local_result; - Area no_device_under_wire_area; - void set_in_rise_time(double rt) - { - in_rise_time = rt; - } - - void leakage_feedback(double temperature); - double max_unpipelined_link_delay; - powerDef power_bit; + void compute(); + string name; + enum Device_ty device_ty; + double in_rise_time, out_rise_time; + InputParameter l_ip; + uca_org_t local_result; + Area no_device_under_wire_area; + void set_in_rise_time(double rt) { in_rise_time = rt; } - double wire_bw; - double init_wire_bw; // bus width at root - double base_width; - double base_height; - int data_width; - enum Wire_type wt; - double width_scaling, space_scaling; - int start_wiring_level; - double length; - double min_w_nmos; - double min_w_pmos; - double latency, throughput; - bool latency_overflow; - bool throughput_overflow; - double interconnect_latency; - double interconnect_throughput; - bool opt_local; - enum Core_type core_ty; - bool pipelinable; - double route_over_perc; - int num_pipe_stages; + void leakage_feedback(double temperature); + double max_unpipelined_link_delay; + powerDef power_bit; - private: - TechnologyParameter::DeviceType *deviceType; + double wire_bw; + double init_wire_bw; // bus width at root + double base_width; + double base_height; + int data_width; + enum Wire_type wt; + double width_scaling, space_scaling; + int start_wiring_level; + double length; + double min_w_nmos; + double min_w_pmos; + double latency, throughput; + bool latency_overflow; + bool throughput_overflow; + double interconnect_latency; + double interconnect_throughput; + bool opt_local; + enum Core_type core_ty; + bool pipelinable; + double route_over_perc; + int num_pipe_stages; +private: + TechnologyParameter::DeviceType *deviceType; }; #endif - diff --git a/iocontrollers.cc b/iocontrollers.cc index 40b1198..1a5d8b2 100644 --- a/iocontrollers.cc +++ b/iocontrollers.cc @@ -28,20 +28,21 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ -#include "io.h" -#include "parameter.h" +#include "iocontrollers.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" #include "const.h" +#include "io.h" #include "logic.h" -#include "basic_circuit.h" -#include +#include "parameter.h" + #include -#include "XML_Parse.h" -#include -#include #include -#include "iocontrollers.h" -#include "basic_components.h" - +#include +#include +#include /* SUN Niagara 2 I/O power analysis: @@ -52,462 +53,528 @@ PCIe bits: (8 + 8)*2 = 32 Debug I/Os: 168 Other I/Os: 711- 32-32 - 384 - 168 = 95 -According to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC Server on a Chip" -90% of I/Os are SerDers (the calucaltion is 384+64/(711-168)=83% about the same as the 90% reported in the paper) +According to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC +Server on a Chip" 90% of I/Os are SerDers (the calucaltion is +384+64/(711-168)=83% about the same as the 90% reported in the paper) --> around 80Pins are common I/Os. Common I/Os consumes 71mW/Gb/s according to Cadence ChipEstimate @65nm -Niagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * 71mW/Gb/s *0.25*1.4Ghz = 2.17W +Niagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * +71mW/Gb/s *0.25*1.4Ghz = 2.17W -Total dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 - 2.17 = 8.98 -Further, if assuming I/O logic power is about 50% of I/Os then Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89 +Total dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 +- 2.17 = 8.98 Further, if assuming I/O logic power is about 50% of I/Os then +Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89 */ /* - * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, a user - * need to re-select the IP clock (the same clk) and then click Estimate. if not reselect - * the new clock rate may not be propogate into the IPs. + * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, + * a user need to re-select the IP clock (the same clk) and then click Estimate. + * if not reselect the new clock rate may not be propogate into the IPs. * */ -NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - - - double frontend_area, phy_area, mac_area, SerDer_area; - double frontend_dyn, mac_dyn, SerDer_dyn; - double frontend_gates, mac_gates, SerDer_gates = 0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - set_niu_param(); - local_result = init_interface(&interface_ip); - - if (niup.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm. - mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" - //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS - frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - //Cadence ChipEstimate using 65nm - mac_gates = 111700; - frontend_gates = 320000; - SerDer_gates = 200000; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - - } - else - {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect - // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not - // a simple summation of all IPs. Ignore this effect - mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer - SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" - //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm - SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - mac_gates = 111700; - frontend_gates = 52000; - SerDer_gates = 199260; - - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - - power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; - power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = power_t.readOp.power_gated_leakage * long_channel_device_reduction; - - power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } - -void NIUController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - - - power = power_t; - power.readOp.dynamic *= niup.duty_cycle; - - } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= niup.perc_load; - } +NIUController::NIUController(ParseXML *XML_interface, + InputParameter *interface_ip_) + : XML(XML_interface), interface_ip(*interface_ip_) { + + double frontend_area, phy_area, mac_area, SerDer_area; + double frontend_dyn, mac_dyn, SerDer_dyn; + double frontend_gates, mac_gates, SerDer_gates = 0.; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing, PMOS_sizing; + + set_niu_param(); + local_result = init_interface(&interface_ip); + + if (niup.type == 0) // high performance NIU + { + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate using 65nm. + mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, ISSCC "An + // 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" and"A 1.2-V-Only 900-mW + // 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning + // Technique" Frontend is PCS + frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * + (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate hard IP @65nm. SerDer is very hard to scale + SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um / + 0.065); //* (interface_ip.F_sz_um/0.065); + phy_area = frontend_area + SerDer_area; + // total area + area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); + // Power + // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = + // P/F = 1.37/1Ghz = 1.37e-9); + mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * + (interface_ip.F_sz_nm / + 65.0); // niup.clockRate; //2.19W@1GHz fully active according to + // Cadence ChipEstimate @65nm + // Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0); // niup.clockRate; + // according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + SerDer_dyn /= + niup.clockRate; // covert to energy per clock cycle of whole NIU + + // Cadence ChipEstimate using 65nm + mac_gates = 111700; + frontend_gates = 320000; + SerDer_gates = 200000; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + + } else { // Low power implementations are mostly from Cadence ChipEstimator; + // Ignore the multiple IP effect + // ---When there are multiple IP (same kind or not) selected, Cadence + // ChipEstimator results are not a simple summation of all IPs. Ignore this + // effect + mac_area = + 0.24 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); // Frontend is the PCS layer + SerDer_area = + 0.35 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + // Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet + // Transceiver and XAUI Interface With Robust VCO Tuning Technique" and the + // ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly + // with the technology total area + area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); + // Power + // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = + // P/F = 1.37/1Ghz = 1.37e-9); + mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * + (interface_ip.F_sz_nm / + 65.0); // niup.clockRate; //2.19W@1GHz fully active according to + // Cadence ChipEstimate @65nm + // Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0); // niup.clockRate; + // SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm + SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + SerDer_dyn /= + niup.clockRate; // covert to energy per clock cycle of whole NIU + + mac_gates = 111700; + frontend_gates = 52000; + SerDer_gates = 199260; + + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } + + power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; + power_t.readOp.leakage = + (mac_gates + frontend_gates + frontend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; + + power_t.readOp.gate_leakage = + (mac_gates + frontend_gates + frontend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W } -void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << "NIU:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl; - cout<sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "NIU:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * niup.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic * niup.clockRate + << " W" << endl; + cout << endl; + } else { + } } -void NIUController::set_niu_param() -{ - niup.clockRate = XML->sys.niu.clockrate; - niup.clockRate *= 1e6; - niup.num_units = XML->sys.niu.number_units; - niup.duty_cycle = XML->sys.niu.duty_cycle; - niup.perc_load = XML->sys.niu.total_load_perc; - niup.type = XML->sys.niu.type; - if ( XML->sys.niu.vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.niu.vdd; - interface_ip.lop_Vdd = XML->sys.niu.vdd; - interface_ip.lstp_Vdd = XML->sys.niu.vdd; - } - - if ( XML->sys.niu.power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.niu.power_gating_vcc; - - } -// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); +void NIUController::set_niu_param() { + niup.clockRate = XML->sys.niu.clockrate; + niup.clockRate *= 1e6; + niup.num_units = XML->sys.niu.number_units; + niup.duty_cycle = XML->sys.niu.duty_cycle; + niup.perc_load = XML->sys.niu.total_load_perc; + niup.type = XML->sys.niu.type; + if (XML->sys.niu.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.niu.vdd; + interface_ip.lop_Vdd = XML->sys.niu.vdd; + interface_ip.lstp_Vdd = XML->sys.niu.vdd; + } + + if (XML->sys.niu.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.niu.power_gating_vcc; + } + // niup.executionTime = + // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); } -PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates=0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - set_pcie_param(); - local_result = init_interface(&interface_ip); - - if (pciep.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm. - ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle - - //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; - //Cadence ChipEstimate using 65nm - ctrl_gates = 900000/8*pciep.num_channels; - // frontend_gates = 120000/8; - // SerDer_gates = 200000/8; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - } - else - { - ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle - - //Cadence ChipEstimate using 65nm - ctrl_gates = 200000/8*pciep.num_channels; - // frontend_gates = 120000/8; - SerDer_gates = 200000/8*pciep.num_channels; - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6); - power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels; - power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = power_t.readOp.power_gated_leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } - -void PCIeController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - - - power = power_t; - power.readOp.dynamic *= pciep.duty_cycle; - - } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= pciep.perc_load; - } +PCIeController::PCIeController(ParseXML *XML_interface, + InputParameter *interface_ip_) + : XML(XML_interface), interface_ip(*interface_ip_) { + + double frontend_area, phy_area, ctrl_area, SerDer_area; + double ctrl_dyn, frontend_dyn, SerDer_dyn; + double ctrl_gates, frontend_gates, SerDer_gates = 0.; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing, PMOS_sizing; + + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ + set_pcie_param(); + local_result = init_interface(&interface_ip); + + if (pciep.type == 0) // high performance NIU + { + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate @ 65nm. + ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, and Cadence + // ChipEstimate @ 65nm. + frontend_area = (5.2 + 0.1) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate hard IP @65nm. SerDer is very hard to scale + SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um / + 0.065); //* (interface_ip.F_sz_um/0.065); + phy_area = frontend_area + SerDer_area; + // total area + // Power + // Cadence ChipEstimate using 65nm the controller includes everything: the + // PHY, the data link and transaction layer + ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = + // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / + 1.2; // PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle + + // power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; + // Cadence ChipEstimate using 65nm + ctrl_gates = 900000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + // SerDer_gates = 200000/8; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + ctrl_area = + 0.412 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, and Cadence + // ChipEstimate @ 65nm. + SerDer_area = + 0.36 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + // total area + // Power + // Cadence ChipEstimate using 65nm the controller includes everything: the + // PHY, the data link and transaction layer + ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = + // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / + 1.2; // PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle + + // Cadence ChipEstimate using 65nm + ctrl_gates = 200000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + SerDer_gates = 200000 / 8 * pciep.num_channels; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } + area.set_area(((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 * + pciep.num_channels) * + 1e6); + power_t.readOp.dynamic = + (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * pciep.num_channels; + power_t.readOp.leakage = + (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; + power_t.readOp.gate_leakage = + (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W } -void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << "PCIe:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout<sys.pcie.clockrate; - pciep.clockRate *= 1e6; - pciep.num_units = XML->sys.pcie.number_units; - pciep.num_channels = XML->sys.pcie.num_channels; - pciep.duty_cycle = XML->sys.pcie.duty_cycle; - pciep.perc_load = XML->sys.pcie.total_load_perc; - pciep.type = XML->sys.pcie.type; - pciep.withPHY = XML->sys.pcie.withPHY; - - if ( XML->sys.pcie.vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.pcie.vdd; - interface_ip.lop_Vdd = XML->sys.pcie.vdd; - interface_ip.lstp_Vdd = XML->sys.pcie.vdd; - } - - if ( XML->sys.pcie.power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.pcie.power_gating_vcc; - - } -// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); +void PCIeController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "PCIe:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * pciep.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic * pciep.clockRate + << " W" << endl; + cout << endl; + } else { + } +} +void PCIeController::set_pcie_param() { + pciep.clockRate = XML->sys.pcie.clockrate; + pciep.clockRate *= 1e6; + pciep.num_units = XML->sys.pcie.number_units; + pciep.num_channels = XML->sys.pcie.num_channels; + pciep.duty_cycle = XML->sys.pcie.duty_cycle; + pciep.perc_load = XML->sys.pcie.total_load_perc; + pciep.type = XML->sys.pcie.type; + pciep.withPHY = XML->sys.pcie.withPHY; + + if (XML->sys.pcie.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.pcie.vdd; + interface_ip.lop_Vdd = XML->sys.pcie.vdd; + interface_ip.lstp_Vdd = XML->sys.pcie.vdd; + } + + if (XML->sys.pcie.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.pcie.power_gating_vcc; + } + // pciep.executionTime = + // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); } -FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates=0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - - set_fc_param(); - local_result = init_interface(&interface_ip); - if (fcp.type == 0) //high performance NIU - { - cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << "Flash Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl; - cout<sys.flashc.mc_clock; -// fcp.clockRate *= 1e6; - fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; - fcp.num_channels = ceil(fcp.peakDataTransferRate/200); - fcp.num_mcs = XML->sys.flashc.number_mcs; - fcp.duty_cycle = XML->sys.flashc.duty_cycle; - fcp.perc_load = XML->sys.flashc.total_load_perc; - fcp.type = XML->sys.flashc.type; - fcp.withPHY = XML->sys.flashc.withPHY; -// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - if ( XML->sys.flashc.vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.flashc.vdd; - interface_ip.lop_Vdd = XML->sys.flashc.vdd; - interface_ip.lstp_Vdd = XML->sys.flashc.vdd; - } - if ( XML->sys.flashc.power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; - - } +void FlashController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "Flash Controller:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" + << endl; // no multiply of clock since this is power already + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic + << " W" << endl; + cout << endl; + } else { + } +} +void FlashController::set_fc_param() { + // fcp.clockRate = XML->sys.flashc.mc_clock; + // fcp.clockRate *= 1e6; + fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; + fcp.num_channels = ceil(fcp.peakDataTransferRate / 200); + fcp.num_mcs = XML->sys.flashc.number_mcs; + fcp.duty_cycle = XML->sys.flashc.duty_cycle; + fcp.perc_load = XML->sys.flashc.total_load_perc; + fcp.type = XML->sys.flashc.type; + fcp.withPHY = XML->sys.flashc.withPHY; + // flashcp.executionTime = + // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); + if (XML->sys.flashc.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.flashc.vdd; + interface_ip.lop_Vdd = XML->sys.flashc.vdd; + interface_ip.lstp_Vdd = XML->sys.flashc.vdd; + } + if (XML->sys.flashc.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; + } } diff --git a/iocontrollers.h b/iocontrollers.h index 0629af3..1c99d4f 100644 --- a/iocontrollers.h +++ b/iocontrollers.h @@ -31,7 +31,6 @@ #ifndef IOCONTROLLERS_H_ #define IOCONTROLLERS_H_ - #endif /* IOCONTROLLERS_H_ */ #include "XML_Parse.h" @@ -39,48 +38,48 @@ //#include "io.h" #include "array.h" //#include "Undifferentiated_Core_Area.h" -#include #include "basic_components.h" +#include + class NIUController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - NIUParam niup; - powerDef power_t; - uca_org_t local_result; - NIUController(ParseXML *XML_interface,InputParameter* interface_ip_); - void set_niu_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~NIUController(){}; +public: + ParseXML *XML; + InputParameter interface_ip; + NIUParam niup; + powerDef power_t; + uca_org_t local_result; + NIUController(ParseXML *XML_interface, InputParameter *interface_ip_); + void set_niu_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~NIUController(){}; }; class PCIeController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - PCIeParam pciep; - powerDef power_t; - uca_org_t local_result; - PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_); - void set_pcie_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~PCIeController(){}; +public: + ParseXML *XML; + InputParameter interface_ip; + PCIeParam pciep; + powerDef power_t; + uca_org_t local_result; + PCIeController(ParseXML *XML_interface, InputParameter *interface_ip_); + void set_pcie_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~PCIeController(){}; }; class FlashController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - MCParam fcp; - powerDef power_t; - uca_org_t local_result; - FlashController(ParseXML *XML_interface,InputParameter* interface_ip_); - void set_fc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~FlashController(){}; +public: + ParseXML *XML; + InputParameter interface_ip; + MCParam fcp; + powerDef power_t; + uca_org_t local_result; + FlashController(ParseXML *XML_interface, InputParameter *interface_ip_); + void set_fc_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~FlashController(){}; }; - diff --git a/logic.cc b/logic.cc index dc64779..8ae826e 100644 --- a/logic.cc +++ b/logic.cc @@ -31,223 +31,247 @@ #include "logic.h" +// selection_logic +selection_logic::selection_logic(bool _is_default, int win_entries_, + int issue_width_, + const InputParameter *configure_interface, + enum Device_ty device_ty_, + enum Core_type core_ty_) + // const ParseXML *_XML_interface) + : is_default(_is_default), win_entries(win_entries_), + issue_width(issue_width_), device_ty(device_ty_), core_ty(core_ty_) { + // uca_org_t result2; + l_ip = *configure_interface; + local_result = init_interface(&l_ip); + // init_tech_params(l_ip.F_sz_um, false); + // win_entries=numIBEntries;//IQentries; + // issue_width=issueWidth; + selection_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; +} -//selection_logic -selection_logic::selection_logic( - bool _is_default, - int win_entries_, - int issue_width_, - const InputParameter *configure_interface, - enum Device_ty device_ty_, - enum Core_type core_ty_) - //const ParseXML *_XML_interface) - :is_default(_is_default), - win_entries(win_entries_), - issue_width(issue_width_), - device_ty(device_ty_), - core_ty(core_ty_) - { - //uca_org_t result2; - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - //init_tech_params(l_ip.F_sz_um, false); - //win_entries=numIBEntries;//IQentries; - //issue_width=issueWidth; - selection_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - } - -void selection_logic::selection_power() -{//based on cost effective superscalar processor TR pp27-31 +void selection_logic::selection_power() { // based on cost effective superscalar + // processor TR pp27-31 double Ctotal, Cor, Cpencode; int num_arbiter; double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; - //TODO: the 0.8um process data is used. - WSelORn = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - WSelORprequ = 50 * l_ip.F_sz_um;//this was 40 micron for the 0.8 micron process - WSelPn = 12.5 * l_ip.F_sz_um;//this was 10mcron for the 0.8 micron process - WSelPp = 18.75 * l_ip.F_sz_um;//this was 15 micron for the 0.8 micron process - WSelEnn = 6.25 * l_ip.F_sz_um;//this was 5 micron for the 0.8 micron process - WSelEnp = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - - - Ctotal=0; - num_arbiter=1; - while(win_entries > 4) - { - win_entries = (int)ceil((double)win_entries / 4.0); - num_arbiter += win_entries; - } - //the 4-input OR logic to generate anyreq - Cor = 4 * drain_C_(WSelORn,NCH,1,1, g_tp.cell_h_def) + drain_C_(WSelORprequ,PCH,1,1, g_tp.cell_h_def); - power.readOp.gate_leakage = cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor)*g_tp.peri_global.Vdd; - - //The total capacity of the 4-bit priority encoder - Cpencode = drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,1, 1, g_tp.cell_h_def) + - 2*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,2, 1, g_tp.cell_h_def) + - 3*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,3, 1, g_tp.cell_h_def) + - 4*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,4, 1, g_tp.cell_h_def) +//precompute priority logic - 2*4*gate_C(WSelEnn+WSelEnp,20.0)+ - 4*drain_C_(WSelEnn,NCH,1, 1, g_tp.cell_h_def) + 2*4*drain_C_(WSelEnp,PCH,1, 1, g_tp.cell_h_def)+//enable logic - (2*4+2*3+2*2+2)*gate_C(WSelPn+WSelPp,10.0);//requests signal - - Ctotal += issue_width * num_arbiter*(Cor+Cpencode); - - power.readOp.dynamic = Ctotal*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*2;//2 means the abitration signal need to travel round trip - power.readOp.leakage = issue_width * num_arbiter * - (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals - )*g_tp.peri_global.Vdd; - power.readOp.gate_leakage = issue_width * num_arbiter * - (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals - )*g_tp.peri_global.Vdd; + // TODO: the 0.8um process data is used. + WSelORn = 12.5 * l_ip.F_sz_um; // this was 10 micron for the 0.8 micron + // process + WSelORprequ = + 50 * l_ip.F_sz_um; // this was 40 micron for the 0.8 micron process + WSelPn = 12.5 * l_ip.F_sz_um; // this was 10mcron for the 0.8 micron process + WSelPp = 18.75 * l_ip.F_sz_um; // this was 15 micron for the 0.8 micron + // process + WSelEnn = 6.25 * l_ip.F_sz_um; // this was 5 micron for the 0.8 micron process + WSelEnp = 12.5 * l_ip.F_sz_um; // this was 10 micron for the 0.8 micron + // process + + Ctotal = 0; + num_arbiter = 1; + while (win_entries > 4) { + win_entries = (int)ceil((double)win_entries / 4.0); + num_arbiter += win_entries; + } + // the 4-input OR logic to generate anyreq + Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def); + power.readOp.gate_leakage = + cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd; + + // The total capacity of the 4-bit priority encoder + Cpencode = + drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) + + 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) + + 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 4, 1, + g_tp.cell_h_def) + // precompute priority logic + 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) + + 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) + + 2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) + // enable logic + (2 * 4 + 2 * 3 + 2 * 2 + 2) * + gate_C(WSelPn + WSelPp, 10.0); // requests signal + + Ctotal += issue_width * num_arbiter * (Cor + Cpencode); + + power.readOp.dynamic = + Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * + 2; // 2 means the abitration signal need to travel round trip + power.readOp.leakage = + issue_width * num_arbiter * + (cmos_Isub_leakage( + WSelPn, WSelPp, 2, + nor) /*approximate precompute with a nor gate*/ // grant1p + + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor) // grant2p + + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor) // grant3p + + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor) * 4 // enable logic + + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv) * 2 * + 3 // for each grant there are two inverters, there are 3 grant + // sIsubnals + ) * + g_tp.peri_global.Vdd; + power.readOp.gate_leakage = + issue_width * num_arbiter * + (cmos_Ig_leakage( + WSelPn, WSelPp, 2, + nor) /*approximate precompute with a nor gate*/ // grant1p + + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor) // grant2p + + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor) // grant3p + + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor) * 4 // enable logic + + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv) * 2 * + 3 // for each grant there are two inverters, there are 3 grant + // signals + ) * + g_tp.peri_global.Vdd; } - dep_resource_conflict_check::dep_resource_conflict_check( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - int compare_bits_, - bool _is_default) - : l_ip(*configure_interface), - coredynp(dyn_p_), - compare_bits(compare_bits_), - is_default(_is_default) -{ - Wcompn = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvp = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvn = 100 * l_ip.F_sz_um;//this was 80.0 mcron for the 0.8 micron process - Wcomppreequ = 50 * l_ip.F_sz_um;//this was 40.0 micron for the 0.8 micron process - WNORn = 6.75 * l_ip.F_sz_um;//this was 5.4 micron for the 0.8 micron process - WNORp = 38.125 * l_ip.F_sz_um;//this was 30.5 micron for the 0.8 micron process - - local_result = init_interface(&l_ip); - - if (coredynp.core_ty==Inorder) - compare_bits += 16 + 8 + 8;//TODO: opcode bits + log(shared resources) + REG TAG BITS-->opcode comparator - else - compare_bits += 16 + 8 + 8; - - conflict_check_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; + const InputParameter *configure_interface, const CoreDynParam &dyn_p_, + int compare_bits_, bool _is_default) + : l_ip(*configure_interface), coredynp(dyn_p_), compare_bits(compare_bits_), + is_default(_is_default) { + Wcompn = 25 * l_ip.F_sz_um; // this was 20.0 micron for the 0.8 micron process + Wevalinvp = + 25 * l_ip.F_sz_um; // this was 20.0 micron for the 0.8 micron process + Wevalinvn = + 100 * l_ip.F_sz_um; // this was 80.0 mcron for the 0.8 micron process + Wcomppreequ = + 50 * l_ip.F_sz_um; // this was 40.0 micron for the 0.8 micron process + WNORn = 6.75 * l_ip.F_sz_um; // this was 5.4 micron for the 0.8 micron process + WNORp = + 38.125 * l_ip.F_sz_um; // this was 30.5 micron for the 0.8 micron process + + local_result = init_interface(&l_ip); + + if (coredynp.core_ty == Inorder) + compare_bits += 16 + 8 + 8; // TODO: opcode bits + log(shared resources) + + // REG TAG BITS-->opcode comparator + else + compare_bits += 16 + 8 + 8; + conflict_check_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; } -void dep_resource_conflict_check::conflict_check_power() -{ - double Ctotal; - int num_comparators; - num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - //When decode-width ==1, no dcl logic - - Ctotal = num_comparators * compare_cap(); - //printf("%i,%s\n",XML_interface->sys.core[0].predictor.predictor_entries,XML_interface->sys.core[0].predictor.prediction_scheme); - - power.readOp.dynamic=Ctotal*/*CLOCKRATE*/g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/*AF*/; - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - +void dep_resource_conflict_check::conflict_check_power() { + double Ctotal; + int num_comparators; + num_comparators = + 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); // 2(N*N-N) is used for source to dest comparison, + // (N*N-N) is used for dest to dest comparision. + // When decode-width ==1, no dcl logic + + Ctotal = num_comparators * compare_cap(); + // printf("%i,%s\n",XML_interface->sys.core[0].predictor.predictor_entries,XML_interface->sys.core[0].predictor.prediction_scheme); + + power.readOp.dynamic = + Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /*AF*/; + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = + num_comparators * compare_bits * 2 * cmos_Ig_leakage(Wcompn, 0, 2, nmos); + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; } /* estimate comparator power consumption (this comparator is similar to the tag-match structure in a CAM */ -double dep_resource_conflict_check::compare_cap() -{ +double dep_resource_conflict_check::compare_cap() { double c1, c2; - WNORp = WNORp * compare_bits/2.0;//resize the big NOR gate at the DCL according to fan in. + WNORp = WNORp * compare_bits / + 2.0; // resize the big NOR gate at the DCL according to fan in. /* bottom part of comparator */ - c2 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def))+ - drain_C_(Wevalinvp,PCH,1,1, g_tp.cell_h_def) + drain_C_(Wevalinvn,NCH,1,1, g_tp.cell_h_def); + c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) + + drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def); /* top part of comparator */ - c1 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def)+ - drain_C_(Wcomppreequ,NCH,1,1, g_tp.cell_h_def)) + gate_C(WNORn + WNORp,10.0) + - drain_C_(WNORp,NCH,2,1, g_tp.cell_h_def) + compare_bits*drain_C_(WNORn,NCH,2,1, g_tp.cell_h_def); - return(c1 + c2); - + c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) + + drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(WNORn + WNORp, 10.0) + + drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + + compare_bits * drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def); + return (c1 + c2); } -void dep_resource_conflict_check::leakage_feedback(double temperature) -{ - l_ip.temp = (unsigned int)round(temperature/10.0)*10; +void dep_resource_conflict_check::leakage_feedback(double temperature) { + l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&l_ip); // init_result is dummy // This is part of conflict_check_power() - int num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); + int num_comparators = + 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); // 2(N*N-N) is used for source to dest comparison, + // (N*N-N) is used for dest to dest comparision. + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = + num_comparators * compare_bits * 2 * cmos_Ig_leakage(Wcompn, 0, 2, nmos); double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; } -//TODO: add inverter and transmission gate base DFF. - -DFFCell::DFFCell( - bool _is_dram, - double _WdecNANDn, - double _WdecNANDp, - double _cell_load, - const InputParameter *configure_interface) -:is_dram(_is_dram), -cell_load(_cell_load), -WdecNANDn(_WdecNANDn), -WdecNANDp(_WdecNANDp) -{//this model is based on the NAND2 based DFF. - l_ip=*configure_interface; -// area.set_area(730*l_ip.F_sz_um*l_ip.F_sz_um); - area.set_area(5*compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, g_tp.cell_h_def) - + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, g_tp.cell_h_def)); - - +// TODO: add inverter and transmission gate base DFF. + +DFFCell::DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, + double _cell_load, const InputParameter *configure_interface) + : is_dram(_is_dram), cell_load(_cell_load), WdecNANDn(_WdecNANDn), + WdecNANDp(_WdecNANDp) { // this model is based on the NAND2 based DFF. + l_ip = *configure_interface; + // area.set_area(730*l_ip.F_sz_um*l_ip.F_sz_um); + area.set_area( + 5 * compute_gate_area(NAND, 2, WdecNANDn, WdecNANDp, g_tp.cell_h_def) + + compute_gate_area(NAND, 2, WdecNANDn, WdecNANDn, g_tp.cell_h_def)); } - -double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) -{ +double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) { double Ctotal = 0; - //printf("WdecNANDn = %E\n", WdecNANDn); + // printf("WdecNANDn = %E\n", WdecNANDn); /* part 1: drain cap of NAND gate */ - Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); + Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); /* part 2: gate cap of NAND gates */ Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); @@ -255,458 +279,642 @@ double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) return Ctotal; } +void DFFCell::compute_DFF_cell() { + double c1, c2, c3, c4, c5, c6; + /* node 5 and node 6 are identical to node 1 in capacitance */ + c1 = c5 = c6 = fpfp_node_cap(2, 1); + c2 = fpfp_node_cap(2, 3); + c3 = fpfp_node_cap(3, 2); + c4 = fpfp_node_cap(2, 2); + + // cap-load of the clock signal in each Dff, actually the clock signal only + // connected to one NAND2 + clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); + e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) * + 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + ; + + /* no 1/2 for e_keep and e_clock because clock signal switches twice in one + * cycle */ + e_keep_1.readOp.dynamic += c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + e_keep_0.readOp.dynamic += c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + e_clock.readOp.dynamic += + clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + ; + + /* static power */ + e_switch.readOp.leakage += + (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5 // 5 NAND2 and 1 NAND3 in a DFF + + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; + e_switch.readOp.gate_leakage += + (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5 // 5 NAND2 and 1 NAND3 in a DFF + + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; + // printf("leakage =%E\n",cmos_Ileak(1, is_dram) ); +} + +Pipeline::Pipeline(const InputParameter *configure_interface, + const CoreDynParam &dyn_p_, enum Device_ty device_ty_, + bool _is_core_pipeline, bool _is_default) + : l_ip(*configure_interface), coredynp(dyn_p_), device_ty(device_ty_), + is_core_pipeline(_is_core_pipeline), is_default(_is_default), + num_piperegs(0.0) -void DFFCell::compute_DFF_cell() { - double c1, c2, c3, c4, c5, c6; - /* node 5 and node 6 are identical to node 1 in capacitance */ - c1 = c5 = c6 = fpfp_node_cap(2, 1); - c2 = fpfp_node_cap(2, 3); - c3 = fpfp_node_cap(3, 2); - c4 = fpfp_node_cap(2, 2); - - //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 - clock_cap= 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); - e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2*cell_load)*0.5*g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - - /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ - e_keep_1.readOp.dynamic += c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_keep_0.readOp.dynamic += c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_clock.readOp.dynamic += clock_cap* g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - - /* static power */ - e_switch.readOp.leakage += (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - e_switch.readOp.gate_leakage += (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - //printf("leakage =%E\n",cmos_Ileak(1, is_dram) ); + local_result = init_interface(&l_ip); + if (!coredynp.Embedded) + process_ind = true; + else + process_ind = false; + WNANDn = + (process_ind) + ? 25 * l_ip.F_sz_um + : g_tp.min_w_nmos_; // this was 20 micron for the 0.8 micron process + WNANDp = (process_ind) + ? 37.5 * l_ip.F_sz_um + : g_tp.min_w_nmos_ * + pmos_to_nmos_sz_ratio(); // this was 30 micron for the 0.8 + // micron process + load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); + compute(); } -Pipeline::Pipeline( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - enum Device_ty device_ty_, - bool _is_core_pipeline, - bool _is_default) -: l_ip(*configure_interface), - coredynp(dyn_p_), - device_ty(device_ty_), - is_core_pipeline(_is_core_pipeline), - is_default(_is_default), - num_piperegs(0.0) - - { - local_result = init_interface(&l_ip); - if (!coredynp.Embedded) - process_ind = true; - else - process_ind = false; - WNANDn = (process_ind)? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;//this was 20 micron for the 0.8 micron process - WNANDp = (process_ind)? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_*pmos_to_nmos_sz_ratio();//this was 30 micron for the 0.8 micron process - load_per_pipeline_stage = 2*gate_C(WNANDn + WNANDp, 0, false); - compute(); +void Pipeline::compute() { + compute_stage_vector(); + DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); + pipe_reg.compute_DFF_cell(); + + double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; + //******************pipeline power: currently, we average all the + // possibilities of the states of DFFs in the pipeline. A better way to do it + // is to consider the harming distance of two consecutive signals, However + // McPAT does not have plan to do this in near future as it focuses on worst + // case power. + double pipe_reg_power = + num_piperegs * + (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic + + pipe_reg.e_keep_1.readOp.dynamic) / + 3 + + clock_power_pipereg; + double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; + double pipe_reg_gate_leakage = + num_piperegs * pipe_reg.e_switch.readOp.gate_leakage; + power.readOp.dynamic += pipe_reg_power; + power.readOp.leakage += pipe_reg_leakage; + power.readOp.gate_leakage += pipe_reg_gate_leakage; + area.set_area(num_piperegs * pipe_reg.area.get_area()); + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; -} + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; -void Pipeline::compute() -{ - compute_stage_vector(); - DFFCell pipe_reg(false, WNANDn,WNANDp, load_per_pipeline_stage, &l_ip); - pipe_reg.compute_DFF_cell(); - - double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; - //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider - //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. - double pipe_reg_power = num_piperegs * (pipe_reg.e_switch.readOp.dynamic+pipe_reg.e_keep_0.readOp.dynamic+pipe_reg.e_keep_1.readOp.dynamic)/3+clock_power_pipereg; - double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; - double pipe_reg_gate_leakage = num_piperegs * pipe_reg.e_switch.readOp.gate_leakage; - power.readOp.dynamic +=pipe_reg_power; - power.readOp.leakage +=pipe_reg_leakage; - power.readOp.gate_leakage +=pipe_reg_gate_leakage; - area.set_area(num_piperegs * pipe_reg.area.get_area()); - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - - - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - double macro_layout_overhead = g_tp.macro_layout_overhead; - if (!coredynp.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + double macro_layout_overhead = g_tp.macro_layout_overhead; + if (!coredynp.Embedded) + area.set_area(area.get_area() * macro_layout_overhead); } -void Pipeline::compute_stage_vector() -{ - double num_stages, tot_stage_vector, per_stage_vector; - int opcode_length = coredynp.x86? coredynp.micro_opcode_length:coredynp.opcode_length; - //Hthread = thread_clock_gated? 1:num_thread; +void Pipeline::compute_stage_vector() { + double num_stages, tot_stage_vector, per_stage_vector; + int opcode_length = + coredynp.x86 ? coredynp.micro_opcode_length : coredynp.opcode_length; + // Hthread = thread_clock_gated? 1:num_thread; + + if (!is_core_pipeline) { + num_piperegs = l_ip.pipeline_stages * + l_ip.per_stage_vector; // The number of pipeline stages are + // calculated based on the achievable + // throughput and required throughput + } else { + if (coredynp.core_ty == Inorder) { + /* assume 6 pipe stages and try to estimate bits per pipe stage */ + /* pipe stage 0/IF */ + num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads; + /* pipe stage IF/ID */ + num_piperegs += coredynp.fetchW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads; + /* pipe stage IF/ThreadSEL */ + if (coredynp.multithreaded) + num_piperegs += coredynp.num_hthreads * + coredynp.perThreadState; // 8 bit thread states + /* pipe stage ID/EXE */ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width + + pow(2.0, opcode_length) + 2 * coredynp.int_data_width) * + coredynp.num_hthreads; + /* pipe stage EXE/MEM */ + num_piperegs += + coredynp.issueW * + (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + + 8 * 2 * coredynp.int_data_width /*+2*powers (2,reg_length)*/); + /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal + * for the opcode*/ + num_piperegs += + coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + + 8 * 2 * coredynp.int_data_width /*+2*powers (2,reg_length)*/); + // /* pipe stage 5/6 */ + // num_piperegs += issueWidth*(data_width + powers + //(2,opcode_length)/*+2*powers (2,reg_length)*/); + // /* pipe stage 6/7 */ + // num_piperegs += issueWidth*(data_width + powers + //(2,opcode_length)/*+2*powers (2,reg_length)*/); + // /* pipe stage 7/8 */ + // num_piperegs += issueWidth*(data_width + powers + //(2,opcode_length)/**2*powers (2,reg_length)*/); + // /* assume 50% extra in control signals (rule of thumb) + //*/ + num_stages = 6; + + } else { + /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ + /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM + */ + + /* pipe stage 0/1F*/ + num_piperegs += + coredynp.pc_width * 2 * coredynp.num_hthreads; // PC and Next PC + /* pipe stage IF/ID */ + num_piperegs += + coredynp.fetchW * (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads; // PC is used to feed branch predictor in ID + /* pipe stage 1D/Renaming*/ + num_piperegs += + coredynp.decodeW * (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads; // PC is for branch exe in later stage. + /* pipe stage Renaming/wire_drive */ + num_piperegs += + coredynp.decodeW * (coredynp.instruction_length + coredynp.pc_width); + /* pipe stage Renaming/IssueQ */ + num_piperegs += coredynp.issueW * + (coredynp.instruction_length + coredynp.pc_width + + 3 * coredynp.phy_ireg_width) * + coredynp.num_hthreads; // 3*coredynp.phy_ireg_width means + // 2 sources and 1 dest + /* pipe stage IssueQ/Dispatch */ + num_piperegs += coredynp.issueW * (coredynp.instruction_length + + 3 * coredynp.phy_ireg_width); + /* pipe stage Dispatch/EXE */ + + num_piperegs += coredynp.issueW * + (3 * coredynp.phy_ireg_width + coredynp.pc_width + + pow(2.0, opcode_length) /*+2*powers (2,reg_length)*/); + /* 2^opcode_length means the total decoded signal for the opcode*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + + pow(2.0, opcode_length) /*+2*powers (2,reg_length)*/); + /*2 source operands in EXE; Assume 2EXE stages* since we do not really + * distinguish OP*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + + pow(2.0, opcode_length) /*+2*powers (2,reg_length)*/); + /* pipe stage EXE/MEM, data need to be read/write, address*/ + num_piperegs += + coredynp.issueW * + (coredynp.int_data_width + coredynp.v_address_width + + pow(2.0, + opcode_length) /*+2*powers (2,reg_length)*/); // memory Opcode + // still need to be + // passed + /* pipe stage MEM/WB; result data, writeback regs */ + num_piperegs += + coredynp.issueW * (coredynp.int_data_width + coredynp.phy_ireg_width /* powers (2,opcode_length) + (2,opcode_length)+2*powers (2,reg_length)*/); + /* pipe stage WB/CM ; result data, regs need to be updated, address for + * resolve memory ops in ROB's top*/ + num_piperegs += + coredynp.commitW * + (coredynp.int_data_width + coredynp.v_address_width + coredynp.phy_ireg_width /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) * + coredynp.num_hthreads; + // if (multithreaded) + // { + // + // } + num_stages = 12; + } - if (!is_core_pipeline) - { - num_piperegs=l_ip.pipeline_stages*l_ip.per_stage_vector;//The number of pipeline stages are calculated based on the achievable throughput and required throughput - } - else - { - if (coredynp.core_ty==Inorder) - { - /* assume 6 pipe stages and try to estimate bits per pipe stage */ - /* pipe stage 0/IF */ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads; - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads; - /* pipe stage IF/ThreadSEL */ - if (coredynp.multithreaded) num_piperegs += coredynp.num_hthreads*coredynp.perThreadState; //8 bit thread states - /* pipe stage ID/EXE */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width + pow(2.0,opcode_length)+ 2*coredynp.int_data_width)*coredynp.num_hthreads; - /* pipe stage EXE/MEM */ - num_piperegs += coredynp.issueW*(3 * coredynp.arch_ireg_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); - /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); -// /* pipe stage 5/6 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 6/7 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 7/8 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/**2*powers (2,reg_length)*/); -// /* assume 50% extra in control signals (rule of thumb) */ - num_stages=6; - - } - else - { - /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ - /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ - - /* pipe stage 0/1F*/ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads ;//PC and Next PC - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is used to feed branch predictor in ID - /* pipe stage 1D/Renaming*/ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is for branch exe in later stage. - /* pipe stage Renaming/wire_drive */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width); - /* pipe stage Renaming/IssueQ */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + coredynp.pc_width + 3*coredynp.phy_ireg_width)*coredynp.num_hthreads;//3*coredynp.phy_ireg_width means 2 sources and 1 dest - /* pipe stage IssueQ/Dispatch */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + 3 * coredynp.phy_ireg_width); - /* pipe stage Dispatch/EXE */ - - num_piperegs += coredynp.issueW*(3 * coredynp.phy_ireg_width + coredynp.pc_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* pipe stage EXE/MEM, data need to be read/write, address*/ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.v_address_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);//memory Opcode still need to be passed - /* pipe stage MEM/WB; result data, writeback regs */ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.phy_ireg_width /* powers (2,opcode_length) + (2,opcode_length)+2*powers (2,reg_length)*/); - /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ - num_piperegs += coredynp.commitW*(coredynp.int_data_width + coredynp.v_address_width + coredynp.phy_ireg_width/*+ powers (2,opcode_length)*2*powers (2,reg_length)*/)*coredynp.num_hthreads; -// if (multithreaded) -// { -// -// } - num_stages=12; - - } - - /* assume 50% extra in control registers and interrupt registers (rule of thumb) */ - num_piperegs = num_piperegs * 1.5; - tot_stage_vector=num_piperegs; - per_stage_vector=tot_stage_vector/num_stages; - - if (coredynp.core_ty==Inorder) - { - if (coredynp.pipeline_stages>6) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; - } - else//OOO - { - if (coredynp.pipeline_stages>12) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; - } - } + /* assume 50% extra in control registers and interrupt registers (rule of + * thumb) */ + num_piperegs = num_piperegs * 1.5; + tot_stage_vector = num_piperegs; + per_stage_vector = tot_stage_vector / num_stages; + if (coredynp.core_ty == Inorder) { + if (coredynp.pipeline_stages > 6) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; + } else // OOO + { + if (coredynp.pipeline_stages > 12) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; + } + } } -FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - fu_type(fu_type_) -{ - double area_t;//, leakage, gate_leakage; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); - if (XML->sys.Embedded) - { - if (fu_type == FPU) - { - num_fu=coredynp.num_fpus; - //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number - //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60% - if (g_ip->F_sz_nm>90) - area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles. -// base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) -// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ) - //FPU power from Sandia's processor sizing tech report - FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; - area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; -// base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) -// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) - FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU - - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; - area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; -// base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) -// base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch - FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { - cout<<"Unknown Functional Unit Type"<F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 - if (g_ip->F_sz_nm>90) - area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles. - base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) - FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; - area_t = 280*260*2*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) - FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU - - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; - area_t = 280*260*2*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch - FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { - cout<<"Unknown Functional Unit Type"<sys.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); +FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + enum FU_type fu_type_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), fu_type(fu_type_) { + double area_t; //, leakage, gate_leakage; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + + // XML_interface=_XML_interface; + uca_org_t result2; + result2 = init_interface(&interface_ip); + if (XML->sys.Embedded) { + if (fu_type == FPU) { + num_fu = coredynp.num_fpus; + // area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is + // um^2 + area_t = 4.47 * 1e6 * + (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / + 90.0); // this is um^2 The base number + // 4.47 contains both VFP and NEON processing unit, VFP is about 40% and + // NEON is about 60% + if (g_ip->F_sz_nm > 90) + area_t = 4.47 * 1e6 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage( + 5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction + // in FPU usually it can have up to 20 cycles. + // base_energy = coredynp.core_ty==Inorder? 0: + // 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + // //This is per Hz energy(nJ) + // FPU power from Sandia's processor sizing tech report + FU_height = (18667 * num_fu) * interface_ip.F_sz_um; // FPU from Sun's + // data + } else if (fu_type == ALU) { + num_fu = coredynp.num_alus; + area_t = + 280 * 260 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd / 2; + // base_energy = coredynp.core_ty==Inorder? + // 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ) + FU_height = (6222 * num_fu) * interface_ip.F_sz_um; // integer ALU + + } else if (fu_type == MUL) { + num_fu = coredynp.num_muls; + area_t = + 280 * 260 * 3 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd / 2; + // base_energy = coredynp.core_ty==Inorder? + // 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 * 2 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ), coefficient based on Wattch + FU_height = + (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data + } else { + cout << "Unknown Functional Unit Type" << endl; + exit(0); + } + per_access_energy *= 0.5; // According to ARM data embedded processor has + // much lower per acc energy + } else { + if (fu_type == FPU) { + num_fu = coredynp.num_fpus; + // area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is + // um^2 + area_t = 8.47 * 1e6 * + (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / 90.0); // this is um^2 + if (g_ip->F_sz_nm > 90) + area_t = 8.47 * 1e6 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage( + 5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction + // in FPU usually it can have up to 20 cycles. + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3 * 3; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 * 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + // //This is per op energy(nJ) + FU_height = (38667 * num_fu) * interface_ip.F_sz_um; // FPU from Sun's + // data + } else if (fu_type == ALU) { + num_fu = coredynp.num_alus; + area_t = + 280 * 260 * 2 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd / 2; + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ) + FU_height = (6222 * num_fu) * interface_ip.F_sz_um; // integer ALU + + } else if (fu_type == MUL) { + num_fu = coredynp.num_muls; + area_t = + 280 * 260 * 2 * 3 * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd / 2; + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3 * 2; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 * 2 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ), coefficient based on Wattch + FU_height = + (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data + } else { + cout << "Unknown Functional Unit Type" << endl; + exit(0); + } + } + // IEXEU, simple ALU and FPU + // double C_ALU, C_EXEU, C_FPU; //Lum Equivalent capacitance of IEXEU and + // FPU. Based on Intel and Sun 90nm process fabracation. + // + // C_ALU = 0.025e-9;//F + // C_EXEU = 0.05e-9; //F + // C_FPU = 0.35e-9;//F + area.set_area(area_t * num_fu); + leakage *= num_fu; + gate_leakage *= num_fu; + double macro_layout_overhead = g_tp.macro_layout_overhead; + // if (!XML->sys.Embedded) + area.set_area(area.get_area() * macro_layout_overhead); } -void FunctionalUnit::computeEnergy(bool is_tdp) -{ - double pppm_t[4] = {1,1,1,1}; - double FU_duty_cycle; - if (is_tdp) - { - - - set_pppm(pppm_t, 2, 2, 2, 2);//2 means two source operands needs to be passed for each int instruction. - if (fu_type == FPU) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.FPU_duty_cycle; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = 1*num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.ALU_duty_cycle; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.MUL_duty_cycle; - } - - //power.readOp.dynamic = base_energy/clockRate + energy*stats_t.readAc.access; - power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy/clockRate; - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation*FU_duty_cycle; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - power.readOp.leakage = leakage; - power.readOp.gate_leakage = gate_leakage; - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - - - } - else - { - if (fu_type == FPU) - { - stats_t.readAc.access = XML->sys.core[ithCore].fpu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = XML->sys.core[ithCore].ialu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = XML->sys.core[ithCore].mul_accesses; - rtp_stats = stats_t; - } - - //rt_power.readOp.dynamic = base_energy*executionTime + energy*stats_t.readAc.access; - rt_power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy*executionTime; - double sckRation = g_tp.sckt_co_eff; - rt_power.readOp.dynamic *= sckRation; - rt_power.writeOp.dynamic *= sckRation; - rt_power.searchOp.dynamic *= sckRation; - - } +void FunctionalUnit::computeEnergy(bool is_tdp) { + double pppm_t[4] = {1, 1, 1, 1}; + double FU_duty_cycle; + if (is_tdp) { + + set_pppm(pppm_t, 2, 2, 2, 2); // 2 means two source operands needs to be + // passed for each int instruction. + if (fu_type == FPU) { + stats_t.readAc.access = num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.FPU_duty_cycle; + } else if (fu_type == ALU) { + stats_t.readAc.access = 1 * num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.ALU_duty_cycle; + } else if (fu_type == MUL) { + stats_t.readAc.access = num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.MUL_duty_cycle; + } + // power.readOp.dynamic = base_energy/clockRate + + // energy*stats_t.readAc.access; + power.readOp.dynamic = + per_access_energy * stats_t.readAc.access + base_energy / clockRate; + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation * FU_duty_cycle; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + power.readOp.leakage = leakage; + power.readOp.gate_leakage = gate_leakage; + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; + + } else { + if (fu_type == FPU) { + stats_t.readAc.access = XML->sys.core[ithCore].fpu_accesses; + rtp_stats = stats_t; + } else if (fu_type == ALU) { + stats_t.readAc.access = XML->sys.core[ithCore].ialu_accesses; + rtp_stats = stats_t; + } else if (fu_type == MUL) { + stats_t.readAc.access = XML->sys.core[ithCore].mul_accesses; + rtp_stats = stats_t; + } + // rt_power.readOp.dynamic = base_energy*executionTime + + // energy*stats_t.readAc.access; + rt_power.readOp.dynamic = + per_access_energy * stats_t.readAc.access + base_energy * executionTime; + double sckRation = g_tp.sckt_co_eff; + rt_power.readOp.dynamic *= sckRation; + rt_power.writeOp.dynamic *= sckRation; + rt_power.searchOp.dynamic *= sckRation; + } } -void FunctionalUnit::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - if (fu_type == FPU) - { - cout << indent_str << "Floating Point Units (FPUs) (Count: "<< coredynp.num_fpus <<" ):" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + // cout << indent_str_next << "Results Broadcast Bus Area = " << + // bypass->area.get_area() *1e-6 << " mm^2" << endl; + if (is_tdp) { + if (fu_type == FPU) { + cout << indent_str + << "Floating Point Units (FPUs) (Count: " << coredynp.num_fpus + << " ):" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << endl; + // cout << indent_str_next << "Subthreshold Leakage + //= " << power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + } else if (fu_type == ALU) { + cout << indent_str << "Integer ALUs (Count: " << coredynp.num_alus + << " ):" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << endl; + // cout << indent_str_next << "Subthreshold Leakage + //= " << power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + } else if (fu_type == MUL) { + cout << indent_str + << "Complex ALUs (Mul/Div) (Count: " << coredynp.num_muls + << " ):" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << endl; + // cout << indent_str_next << "Subthreshold Leakage + //= " << power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + } + } else { + } } -void FunctionalUnit::leakage_feedback(double temperature) -{ +void FunctionalUnit::leakage_feedback(double temperature) { // Update the temperature and initialize the global interfaces. - interface_ip.temp = (unsigned int)round(temperature/10.0)*10; + interface_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&interface_ip); // init_result is dummy @@ -714,337 +922,370 @@ void FunctionalUnit::leakage_feedback(double temperature) double area_t, leakage, gate_leakage; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - if (fu_type == FPU) - { - area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number - if (g_ip->F_sz_nm>90) - area_t = 4.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - } - else if (fu_type == ALU) - { - area_t = 280*260*2*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - } - else if (fu_type == MUL) - { - area_t = 280*260*2*3*num_fu*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - } - else - { - cout<<"Unknown Functional Unit Type"<F_sz_nm * g_ip->F_sz_nm / 90.0 / + 90.0); // this is um^2 The base number + if (g_ip->F_sz_nm > 90) + area_t = + 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + } else if (fu_type == ALU) { + area_t = 280 * 260 * 2 * num_fu * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.peri_global.Vdd / 2; + } else if (fu_type == MUL) { + area_t = 280 * 260 * 2 * 3 * num_fu * + g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.peri_global.Vdd / 2; + } else { + cout << "Unknown Functional Unit Type" << endl; exit(1); } - power.readOp.leakage = leakage*num_fu; - power.readOp.gate_leakage = gate_leakage*num_fu; - power.readOp.longer_channel_leakage = longer_channel_device_reduction(Core_device, coredynp.core_ty)*power.readOp.leakage; + power.readOp.leakage = leakage * num_fu; + power.readOp.gate_leakage = gate_leakage * num_fu; + power.readOp.longer_channel_leakage = + longer_channel_device_reduction(Core_device, coredynp.core_ty) * + power.readOp.leakage; double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; } -UndiffCore::UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_, bool embedded_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - core_ty(coredynp.core_ty), - embedded(XML->sys.Embedded), - pipeline_stage(coredynp.pipeline_stages), - num_hthreads(coredynp.num_hthreads), - issue_width(coredynp.issueW), - exist(exist_) +UndiffCore::UndiffCore(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_, bool embedded_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), core_ty(coredynp.core_ty), embedded(XML->sys.Embedded), + pipeline_stage(coredynp.pipeline_stages), + num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW), + exist(exist_) // is_default(_is_default) { - if (!exist) return; - double undifferentiated_core=0; - double core_tx_density=0; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double undifferentiated_core_coe; - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); - - //Compute undifferentiated core area at 90nm. - if (embedded==false) - { - //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements - if (core_ty==OOO) - { - //undifferentiated_core = (0.0764*pipeline_stage*pipeline_stage -2.3685*pipeline_stage + 10.405);//OOO - undifferentiated_core = (3.57*log(pipeline_stage)-1.2643)>0?(3.57*log(pipeline_stage)-1.2643):0; - } - else if (core_ty==Inorder) - { - //undifferentiated_core = (0.1238*pipeline_stage + 7.2572)*0.9;//inorder - undifferentiated_core = (-2.19*log(pipeline_stage)+6.55)>0?(-2.19*log(pipeline_stage)+6.55):0; - } - else - { - cout<<"invalid core type"<sys.opt_clockrate) - undifferentiated_core_coe = 0.05; - else - undifferentiated_core_coe = 0; - undifferentiated_core = (0.4109* pipeline_stage - 0.776)*undifferentiated_core_coe; - undifferentiated_core *= (1+ logtwo(num_hthreads)* 0.0426); - } - - undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff*1e6;//change from mm^2 to um^2 - core_tx_density = g_tp.scaling_factor.core_tx_density; - //undifferentiated_core = 3*1e6; - //undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd; - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - - - area.set_area(undifferentiated_core); - - scktRatio = g_tp.sckt_co_eff; - power.readOp.dynamic *= scktRatio; - power.writeOp.dynamic *= scktRatio; - power.searchOp.dynamic *= scktRatio; - macro_PR_overhead = g_tp.macro_layout_overhead; - area.set_area(area.get_area()*macro_PR_overhead); - - - -// double vt=g_tp.peri_global.Vth; -// double velocity_index=1.1; -// double c_in=gate_C(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r , 0.0, false); -// double c_out= drain_C_(g_tp.min_w_nmos_, NCH, 2, 1, g_tp.cell_h_def, false) + drain_C_(g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, PCH, 1, 1, g_tp.cell_h_def, false) + c_in; -// double w_nmos=g_tp.min_w_nmos_; -// double w_pmos=g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; -// double i_on_n=1.0; -// double i_on_p=1.0; -// double i_on_n_in=1.0; -// double i_on_p_in=1; -// double vdd=g_tp.peri_global.Vdd; - -// power.readOp.sc=shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd); -// power.readOp.dynamic=c_out*vdd*vdd/2; - -// cout< 0 + ? (3.57 * log(pipeline_stage) - 1.2643) + : 0; + } else if (core_ty == Inorder) { + // undifferentiated_core = (0.1238*pipeline_stage + 7.2572)*0.9;//inorder + undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 + ? (-2.19 * log(pipeline_stage) + 6.55) + : 0; + } else { + cout << "invalid core type" << endl; + exit(0); + } + undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716); + } else { + // Based on the results in paper "parametrized processor models" Sandia Labs + if (XML->sys.opt_clockrate) + undifferentiated_core_coe = 0.05; + else + undifferentiated_core_coe = 0; + undifferentiated_core = + (0.4109 * pipeline_stage - 0.776) * undifferentiated_core_coe; + undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426); + } + undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff * + 1e6; // change from mm^2 to um^2 + core_tx_density = g_tp.scaling_factor.core_tx_density; + // undifferentiated_core = 3*1e6; + // undifferentiated_core *= + // g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; + power.readOp.leakage = + undifferentiated_core * + (core_tx_density)*cmos_Isub_leakage( + 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd; // unit W + power.readOp.gate_leakage = + undifferentiated_core * + (core_tx_density)*cmos_Ig_leakage( + 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + inv) * + g_tp.peri_global.Vdd; + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; + + area.set_area(undifferentiated_core); + + scktRatio = g_tp.sckt_co_eff; + power.readOp.dynamic *= scktRatio; + power.writeOp.dynamic *= scktRatio; + power.searchOp.dynamic *= scktRatio; + macro_PR_overhead = g_tp.macro_layout_overhead; + area.set_area(area.get_area() * macro_PR_overhead); + + // double vt=g_tp.peri_global.Vth; + // double velocity_index=1.1; + // double c_in=gate_C(g_tp.min_w_nmos_, + // g_tp.min_w_nmos_*pmos_to_nmos_sizing_r , 0.0, false); double + // c_out= drain_C_(g_tp.min_w_nmos_, NCH, 2, 1, g_tp.cell_h_def, false) + + // drain_C_(g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, PCH, 1, 1, + // g_tp.cell_h_def, false) + c_in; double w_nmos=g_tp.min_w_nmos_; + // double w_pmos=g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; double + // i_on_n=1.0; double + // i_on_p=1.0; double i_on_n_in=1.0; double i_on_p_in=1; + // double vdd=g_tp.peri_global.Vdd; + + // power.readOp.sc=shortcircuit_simple(vt, velocity_index, c_in, + // c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd); + // power.readOp.dynamic=c_out*vdd*vdd/2; + + // cout<sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << indent_str << "UndiffCore:" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - //cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "UndiffCore:" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << endl; + // cout << indent_str_next << "Subthreshold Leakage = " << + // power.readOp.leakage <<" W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + // cout << indent_str_next << "Runtime Dynamic = " << + // rt_power.readOp.dynamic/executionTime << " W" << endl; + cout << endl; + } else { + cout << indent_str << "UndiffCore:" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + // cout << indent_str_next << "Runtime Dynamic = " << + // rt_power.readOp.dynamic/executionTime << " W" << endl; + cout << endl; + } } -inst_decoder::inst_decoder( - bool _is_default, - const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_, - enum Core_type core_ty_) -:is_default(_is_default), - opcode_length(opcode_length_), - num_decoders(num_decoders_), - x86(x86_), - device_ty(device_ty_), - core_ty(core_ty_) - { - /* - * Instruction decoder is different from n to 2^n decoders - * that are commonly used in row decoders in memory arrays. - * The RISC instruction decoder is typically a very simple device. - * We can decode an instruction by simply - * separating the machine word into small parts using wire slices - * The RISC instruction decoder can be approximate by the n to 2^n decoders, - * although this approximation usually underestimate power since each decoded - * instruction normally has more than 1 active signal. - * - * However, decoding a CISC instruction word is much more difficult - * than the RISC case. A CISC decoder is typically set up as a state machine. - * The machine reads the opcode field to determine - * what type of instruction it is, - * and where the other data values are. - * The instruction word is read in piece by piece, - * and decisions are made at each stage as to - * how the remainder of the instruction word will be read. - * (sequencer and ROM are usually needed) - * An x86 decoder can be even more complex since - * it involve both decoding instructions into u-ops and - * merge u-ops when doing micro-ops fusion. - */ - bool is_dram=false; - double pmos_to_nmos_sizing_r; - double load_nmos_width, load_pmos_width; - double C_driver_load, R_wire_load; - Area cell; - - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - cell.h =g_tp.cell_h_def; - cell.w =g_tp.cell_h_def; - - num_decoder_segments = (int)ceil(opcode_length/18.0); - if (opcode_length > 18) opcode_length = 18; - num_decoded_signals= (int)pow(2.0,opcode_length); - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - load_nmos_width=g_tp.max_w_nmos_ /2; - load_pmos_width= g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; - C_driver_load = 1024*gate_C(load_nmos_width + load_pmos_width, 0, is_dram); //TODO: this number 1024 needs to be revisited - R_wire_load = 3000*l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; - - final_dec = new Decoder( - num_decoded_signals, - false, - C_driver_load, - R_wire_load, - false/*is_fa*/, - false/*is_dram*/, - false/*wl_tr*/, //to use peri device - cell); - - PredecBlk * predec_blk1 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - true); - PredecBlk * predec_blk2 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - false); - - PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); - PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); - - pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); - - double area_decoder = final_dec->area.get_area() * num_decoded_signals * num_decoder_segments*num_decoders; - //double w_decoder = area_decoder / area.get_h(); - double area_pre_dec = (predec_blk_drv1->area.get_area() + - predec_blk_drv2->area.get_area() + - predec_blk1->area.get_area() + - predec_blk2->area.get_area())* - num_decoder_segments*num_decoders; - area.set_area(area.get_area()+ area_decoder + area_pre_dec); - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); - - inst_decoder_delay_power(); - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; +inst_decoder::inst_decoder(bool _is_default, + const InputParameter *configure_interface, + int opcode_length_, int num_decoders_, bool x86_, + enum Device_ty device_ty_, enum Core_type core_ty_) + : is_default(_is_default), opcode_length(opcode_length_), + num_decoders(num_decoders_), x86(x86_), device_ty(device_ty_), + core_ty(core_ty_) { + /* + * Instruction decoder is different from n to 2^n decoders + * that are commonly used in row decoders in memory arrays. + * The RISC instruction decoder is typically a very simple device. + * We can decode an instruction by simply + * separating the machine word into small parts using wire slices + * The RISC instruction decoder can be approximate by the n to 2^n decoders, + * although this approximation usually underestimate power since each decoded + * instruction normally has more than 1 active signal. + * + * However, decoding a CISC instruction word is much more difficult + * than the RISC case. A CISC decoder is typically set up as a state machine. + * The machine reads the opcode field to determine + * what type of instruction it is, + * and where the other data values are. + * The instruction word is read in piece by piece, + * and decisions are made at each stage as to + * how the remainder of the instruction word will be read. + * (sequencer and ROM are usually needed) + * An x86 decoder can be even more complex since + * it involve both decoding instructions into u-ops and + * merge u-ops when doing micro-ops fusion. + */ + bool is_dram = false; + double pmos_to_nmos_sizing_r; + double load_nmos_width, load_pmos_width; + double C_driver_load, R_wire_load; + Area cell; + + l_ip = *configure_interface; + local_result = init_interface(&l_ip); + cell.h = g_tp.cell_h_def; + cell.w = g_tp.cell_h_def; + + num_decoder_segments = (int)ceil(opcode_length / 18.0); + if (opcode_length > 18) + opcode_length = 18; + num_decoded_signals = (int)pow(2.0, opcode_length); + pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + load_nmos_width = g_tp.max_w_nmos_ / 2; + load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; + C_driver_load = + 1024 * gate_C(load_nmos_width + load_pmos_width, 0, + is_dram); // TODO: this number 1024 needs to be revisited + R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; + + final_dec = new Decoder(num_decoded_signals, false, C_driver_load, + R_wire_load, false /*is_fa*/, false /*is_dram*/, + false /*wl_tr*/, // to use peri device + cell); + + PredecBlk *predec_blk1 = + new PredecBlk(num_decoded_signals, final_dec, + 0, // Assuming predec and dec are back to back + 0, + 1, // Each Predec only drives one final dec + false /*is_dram*/, true); + PredecBlk *predec_blk2 = + new PredecBlk(num_decoded_signals, final_dec, + 0, // Assuming predec and dec are back to back + 0, + 1, // Each Predec only drives one final dec + false /*is_dram*/, false); + + PredecBlkDrv *predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); + PredecBlkDrv *predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); + + pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); + + double area_decoder = final_dec->area.get_area() * num_decoded_signals * + num_decoder_segments * num_decoders; + // double w_decoder = area_decoder / area.get_h(); + double area_pre_dec = + (predec_blk_drv1->area.get_area() + predec_blk_drv2->area.get_area() + + predec_blk1->area.get_area() + predec_blk2->area.get_area()) * + num_decoder_segments * num_decoders; + area.set_area(area.get_area() + area_decoder + area_pre_dec); + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + area.set_area(area.get_area() * macro_layout_overhead * chip_PR_overhead); + + inst_decoder_delay_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; } -void inst_decoder::inst_decoder_delay_power() -{ - - double dec_outrisetime; - double inrisetime=0, outrisetime; - double pppm_t[4] = {1,1,1,1}; - double squencer_passes = x86?2:1; - - outrisetime = pre_dec->compute_delays(inrisetime); - dec_outrisetime = final_dec->compute_delays(outrisetime); - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); - power = power + pre_dec->power*pppm_t; - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals, - num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); - power = power + final_dec->power*pppm_t; +void inst_decoder::inst_decoder_delay_power() { + + double dec_outrisetime; + double inrisetime = 0, outrisetime; + double pppm_t[4] = {1, 1, 1, 1}; + double squencer_passes = x86 ? 2 : 1; + + outrisetime = pre_dec->compute_delays(inrisetime); + dec_outrisetime = final_dec->compute_delays(outrisetime); + set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments, + squencer_passes * num_decoder_segments, num_decoder_segments); + power = power + pre_dec->power * pppm_t; + set_pppm(pppm_t, squencer_passes * num_decoder_segments, + num_decoder_segments * num_decoded_signals, + num_decoder_segments * num_decoded_signals, + squencer_passes * num_decoder_segments); + power = power + final_dec->power * pppm_t; } -void inst_decoder::leakage_feedback(double temperature) -{ - l_ip.temp = (unsigned int)round(temperature/10.0)*10; +void inst_decoder::leakage_feedback(double temperature) { + l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&l_ip); // init_result is dummy final_dec->leakage_feedback(temperature); pre_dec->leakage_feedback(temperature); - double pppm_t[4] = {1,1,1,1}; - double squencer_passes = x86?2:1; + double pppm_t[4] = {1, 1, 1, 1}; + double squencer_passes = x86 ? 2 : 1; - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); - power = pre_dec->power*pppm_t; + set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments, + squencer_passes * num_decoder_segments, num_decoder_segments); + power = pre_dec->power * pppm_t; - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals,num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); - power = power + final_dec->power*pppm_t; + set_pppm(pppm_t, squencer_passes * num_decoder_segments, + num_decoder_segments * num_decoded_signals, + num_decoder_segments * num_decoded_signals, + squencer_passes * num_decoder_segments); + power = power + final_dec->power * pppm_t; double sckRation = g_tp.sckt_co_eff; @@ -1052,25 +1293,25 @@ void inst_decoder::leakage_feedback(double temperature) power.writeOp.dynamic *= sckRation; power.searchOp.dynamic *= sckRation; - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage*pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; - - + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; } -inst_decoder::~inst_decoder() -{ - local_result.cleanup(); +inst_decoder::~inst_decoder() { + local_result.cleanup(); - delete final_dec; + delete final_dec; - delete pre_dec->blk1; - delete pre_dec->blk2; - delete pre_dec->drv1; - delete pre_dec->drv2; - delete pre_dec; + delete pre_dec->blk1; + delete pre_dec->blk2; + delete pre_dec->drv1; + delete pre_dec->drv2; + delete pre_dec; } diff --git a/logic.h b/logic.h index 49640d1..ee45ec7 100644 --- a/logic.h +++ b/logic.h @@ -31,203 +31,202 @@ #ifndef LOGIC_H_ #define LOGIC_H_ -#include "const.h" -#include "component.h" -#include "basic_components.h" +#include "XML_Parse.h" +#include "arch_const.h" #include "basic_circuit.h" +#include "basic_components.h" #include "cacti_interface.h" +#include "component.h" +#include "const.h" #include "decoder.h" #include "parameter.h" #include "xmlParser.h" -#include "XML_Parse.h" -#include "arch_const.h" + +#include +#include #include #include -#include -#include - using namespace std; -class selection_logic : public Component{ +class selection_logic : public Component { public: - selection_logic(bool _is_default, int win_entries_, - int issue_width_, const InputParameter *configure_interface, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface); - bool is_default; - InputParameter l_ip; - uca_org_t local_result; - const ParseXML *XML_interface; - int win_entries; - int issue_width; - int num_threads; - enum Device_ty device_ty; - enum Core_type core_ty; - - void selection_power(); - void leakage_feedback(double temperature); // TODO + selection_logic( + bool _is_default, int win_entries_, int issue_width_, + const InputParameter *configure_interface, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); //, const ParseXML *_XML_interface); + bool is_default; + InputParameter l_ip; + uca_org_t local_result; + const ParseXML *XML_interface; + int win_entries; + int issue_width; + int num_threads; + enum Device_ty device_ty; + enum Core_type core_ty; + + void selection_power(); + void leakage_feedback(double temperature); // TODO }; -class dep_resource_conflict_check : public Component{ +class dep_resource_conflict_check : public Component { public: - dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; - CoreDynParam coredynp; - int compare_bits; - bool is_default; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - - void conflict_check_power(); - double compare_cap(); - ~dep_resource_conflict_check(){ - local_result.cleanup(); - } - - void leakage_feedback(double temperature); + dep_resource_conflict_check(const InputParameter *configure_interface, + const CoreDynParam &dyn_p_, int compare_bits_, + bool _is_default = true); + InputParameter l_ip; + uca_org_t local_result; + double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; + CoreDynParam coredynp; + int compare_bits; + bool is_default; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + + void conflict_check_power(); + double compare_cap(); + ~dep_resource_conflict_check() { local_result.cleanup(); } + + void leakage_feedback(double temperature); }; -class inst_decoder: public Component{ +class inst_decoder : public Component { public: - inst_decoder(bool _is_default, const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder); - inst_decoder(); - bool is_default; - int opcode_length; - int num_decoders; - bool x86; - int num_decoder_segments; - int num_decoded_signals; - InputParameter l_ip; - uca_org_t local_result; - enum Device_ty device_ty; - enum Core_type core_ty; - - Decoder * final_dec; - Predec * pre_dec; - - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - void inst_decoder_delay_power(); - ~inst_decoder(); - void leakage_feedback(double temperature); + inst_decoder(bool _is_default, const InputParameter *configure_interface, + int opcode_length_, int num_decoders_, bool x86_, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + inst_decoder(); + bool is_default; + int opcode_length; + int num_decoders; + bool x86; + int num_decoder_segments; + int num_decoded_signals; + InputParameter l_ip; + uca_org_t local_result; + enum Device_ty device_ty; + enum Core_type core_ty; + + Decoder *final_dec; + Predec *pre_dec; + + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + void inst_decoder_delay_power(); + ~inst_decoder(); + void leakage_feedback(double temperature); }; class DFFCell : public Component { public: - DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load, - const InputParameter *configure_interface); - InputParameter l_ip; - bool is_dram; - double cell_load; - double WdecNANDn; - double WdecNANDp; - double clock_cap; - int model; - int n_switch; - int n_keep_1; - int n_keep_0; - int n_clock; - powerDef e_switch; - powerDef e_keep_1; - powerDef e_keep_0; - powerDef e_clock; - - double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); - void compute_DFF_cell(void); - }; - -class Pipeline : public Component{ -public: - Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - CoreDynParam coredynp; - enum Device_ty device_ty; - bool is_core_pipeline, is_default; - double num_piperegs; -// int pipeline_stages; -// int tot_stage_vector, per_stage_vector; - bool process_ind; - double WNANDn ; - double WNANDp; - double load_per_pipeline_stage; -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; - void compute_stage_vector(); - void compute(); - ~Pipeline(){ - local_result.cleanup(); - }; + DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, + double _cell_load, const InputParameter *configure_interface); + InputParameter l_ip; + bool is_dram; + double cell_load; + double WdecNANDn; + double WdecNANDp; + double clock_cap; + int model; + int n_switch; + int n_keep_1; + int n_keep_0; + int n_clock; + powerDef e_switch; + powerDef e_keep_1; + powerDef e_keep_0; + powerDef e_clock; + + double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); + void compute_DFF_cell(void); +}; +class Pipeline : public Component { +public: + Pipeline(const InputParameter *configure_interface, + const CoreDynParam &dyn_p_, enum Device_ty device_ty_ = Core_device, + bool _is_core_pipeline = true, bool _is_default = true); + InputParameter l_ip; + uca_org_t local_result; + CoreDynParam coredynp; + enum Device_ty device_ty; + bool is_core_pipeline, is_default; + double num_piperegs; + // int pipeline_stages; + // int tot_stage_vector, per_stage_vector; + bool process_ind; + double WNANDn; + double WNANDp; + double load_per_pipeline_stage; + // int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, + // commitWidth, instruction_length; int PC_width, opcode_length, + // num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; bool + // thread_clock_gated; bool in_order, multithreaded; + void compute_stage_vector(); + void compute(); + ~Pipeline() { local_result.cleanup(); }; }; -//class core_pipeline :public pipeline{ -//public: -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; -// core_pipeline(bool _is_default, const InputParameter *configure_interface); -// virtual void compute_stage_vector(); +// class core_pipeline :public pipeline{ +// public: +// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, +// commitWidth, instruction_length; int PC_width, opcode_length, +// num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; bool +// thread_clock_gated; bool in_order, multithreaded; core_pipeline(bool +//_is_default, const InputParameter *configure_interface); virtual void +// compute_stage_vector(); // //}; -class FunctionalUnit :public Component{ +class FunctionalUnit : public Component { public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double FU_height; - double clockRate,executionTime; - double num_fu; - double energy, base_energy,per_access_energy, leakage, gate_leakage; - bool is_default; - enum FU_type fu_type; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - - FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void leakage_feedback(double temperature); - + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double FU_height; + double clockRate, executionTime; + double num_fu; + double energy, base_energy, per_access_energy, leakage, gate_leakage; + bool is_default; + enum FU_type fu_type; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + + FunctionalUnit(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + enum FU_type fu_type); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void leakage_feedback(double temperature); }; -class UndiffCore :public Component{ +class UndiffCore : public Component { public: - UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false); - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Core_type core_ty; - bool opt_performance, embedded; - double pipeline_stage,num_hthreads,issue_width; - bool is_default; - - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~UndiffCore(){}; - bool exist; - - + UndiffCore(ParseXML *XML_interface, int ithCore_, + InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + bool exist_ = true, bool embedded_ = false); + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + enum Core_type core_ty; + bool opt_performance, embedded; + double pipeline_stage, num_hthreads, issue_width; + bool is_default; + + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~UndiffCore(){}; + bool exist; }; #endif /* LOGIC_H_ */ diff --git a/main.cc b/main.cc index c370e32..100f667 100644 --- a/main.cc +++ b/main.cc @@ -28,74 +28,69 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ -#include "io.h" -#include -#include "xmlParser.h" #include "XML_Parse.h" -#include "processor.h" #include "globalvar.h" +#include "io.h" +#include "processor.h" #include "version.h" +#include "xmlParser.h" +#include using namespace std; -void print_usage(char * argv0); - -int main(int argc,char *argv[]) -{ - char * fb ; - bool infile_specified = false; - int plevel = 2; - opt_for_clk =true; - //cout.precision(10); - if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help")) - { - print_usage(argv[0]); - } +void print_usage(char *argv0); - for (int32_t i = 0; i < argc; i++) - { - if (argv[i] == string("-infile")) - { - infile_specified = true; - i++; - fb = argv[ i]; - } +int main(int argc, char *argv[]) { + char *fb; + bool infile_specified = false; + int plevel = 2; + opt_for_clk = true; + // cout.precision(10); + if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help")) { + print_usage(argv[0]); + } - if (argv[i] == string("-print_level")) - { - i++; - plevel = atoi(argv[i]); - } + for (int32_t i = 0; i < argc; i++) { + if (argv[i] == string("-infile")) { + infile_specified = true; + i++; + fb = argv[i]; + } - if (argv[i] == string("-opt_for_clk")) - { - i++; - opt_for_clk = (bool)atoi(argv[i]); - } - } - if (infile_specified == false) - { - print_usage(argv[0]); - } + if (argv[i] == string("-print_level")) { + i++; + plevel = atoi(argv[i]); + } + if (argv[i] == string("-opt_for_clk")) { + i++; + opt_for_clk = (bool)atoi(argv[i]); + } + } + if (infile_specified == false) { + print_usage(argv[0]); + } - cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") is computing the target processor...\n "<parse(fb); - Processor proc(p1); - proc.displayEnergy(2, plevel); - delete p1; - return 0; + // parse XML-based interface + ParseXML *p1 = new ParseXML(); + p1->parse(fb); + Processor proc(p1); + proc.displayEnergy(2, plevel); + delete p1; + return 0; } -void print_usage(char * argv0) -{ - cerr << "How to use McPAT:" << endl; - cerr << " mcpat -infile -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl; - //cerr << " Note:default print level is at processor level, please increase it to see the details" << endl; - exit(1); +void print_usage(char *argv0) { + cerr << "How to use McPAT:" << endl; + cerr << " mcpat -infile -print_level < level of details " + "0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for " + "target clock rate)>" + << endl; + // cerr << " Note:default print level is at processor level, please + // increase it to see the details" << endl; + exit(1); } diff --git a/memoryctrl.cc b/memoryctrl.cc index 69c47b3..b312474 100644 --- a/memoryctrl.cc +++ b/memoryctrl.cc @@ -28,752 +28,977 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ -#include "io.h" -#include "parameter.h" +#include "memoryctrl.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" #include "const.h" +#include "io.h" #include "logic.h" -#include "basic_circuit.h" -#include +#include "parameter.h" + #include -#include "XML_Parse.h" -#include -#include #include -#include "memoryctrl.h" -#include "basic_components.h" +#include +#include +#include /* overview of MC models: - * McPAT memory controllers are modeled according to large number of industrial data points. - * The Basic memory controller architecture is base on the Synopsis designs - * (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers) - * as in Cadence ChipEstimator Tool + * McPAT memory controllers are modeled according to large number of industrial + * data points. The Basic memory controller architecture is base on the Synopsis + * designs (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite + * protocol controllers) as in Cadence ChipEstimator Tool * - * An MC has 3 parts as shown in this design. McPAT models both high performance MC - * based on Niagara processor designs and curving and low power MC based on data points in - * Cadence ChipEstimator Tool. + * An MC has 3 parts as shown in this design. McPAT models both high performance + * MC based on Niagara processor designs and curving and low power MC based on + * data points in Cadence ChipEstimator Tool. * - * The frontend is modeled analytically, the backend is modeled empirically according to - * DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator Tool - * The PHY is modeled based on - * "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces ," ISSCC 2006, - * and A 14mW 6.25Gb/s Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 + * The frontend is modeled analytically, the backend is modeled empirically + * according to DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator + * Tool The PHY is modeled based on "A 100mW 9.6Gb/s Transceiver in 90nm CMOS + * for next-generation memory interfaces ," ISSCC 2006, and A 14mW 6.25Gb/s + * Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 * - * In Cadence ChipEstimator Tool there are two types of memory controllers: the full memory controllers - * that includes the frontend as the DesignWare DDR2/DDR3-Lite memory controllers and the backend only - * memory controllers as the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite memory - * controllers, all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as - * DDRC 1600A and DDRC 800A). Thus,to some extend the area and power difference between DesignWare - * DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite protocol controllers can be an estimation to the - * frontend power and area, which is very close the analitically modeled results of the frontend for Niagara2@65nm + * In Cadence ChipEstimator Tool there are two types of memory controllers: the + * full memory controllers that includes the frontend as the DesignWare + * DDR2/DDR3-Lite memory controllers and the backend only memory controllers as + * the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite + * memory controllers, all memory controller IP in Cadence ChipEstimator Tool + * are backend memory controllers such as DDRC 1600A and DDRC 800A). Thus,to + * some extend the area and power difference between DesignWare DDR2/DDR3-Lite + * memory controllers and DDR2/DDR3-Lite protocol controllers can be an + * estimation to the frontend power and area, which is very close the + * analitically modeled results of the frontend for Niagara2@65nm * */ -MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:l_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_) -{ +MCBackend::MCBackend(InputParameter *interface_ip_, const MCParam &mcp_, + enum MemoryCtrl_type mc_type_) + : l_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_) { local_result = init_interface(&l_ip); compute(); - } - -void MCBackend::compute() -{ - //double max_row_addr_width = 20.0;//Current address 12~18bits - double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend, +void MCBackend::compute() { + // double max_row_addr_width = 20.0;//Current address 12~18bits + double C_MCB, mc_power, backend_dyn, + backend_gates; //, refresh_period,refresh_freq;//Equivalent per bit Cap + // for backend, double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); double NMOS_sizing, PMOS_sizing; - if (mc_type == MC) - { - if (mcp.type == 0) - { - //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); - area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2 - //assuming the approximately same scaling factor as seen in processors. - //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip. - //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. - //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et - mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; - power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - - } - else - { NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2 - backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate - //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800) - backend_gates = 50000*mcp.dataBusWidth/64.0;//50000 is from Cadence ChipEstimator - - power_t.readOp.dynamic = backend_dyn; - power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - - } - } - else - {//skip old model - cout<<"Unknown memory controllers"<sys.physical_address_width + mcp.opcodeW)/8.0)); - interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; + // memory request reorder buffer + tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; + data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW) / 8.0)); + interface_ip.cache_sz = data * XML->sys.mc.req_window_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //selection and arbitration logic - interface_ip.assoc = 1; //reset to prevent unnecessary warning messages when init_interface - MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device); - - //read buffers. - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; + frontendBuffer = + new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); + frontendBuffer->area.set_area(frontendBuffer->area.get_area() + + frontendBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + frontendBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + + // selection and arbitration logic + interface_ip.assoc = + 1; // reset to prevent unnecessary warning messages when init_interface + MC_arb = + new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel, + 1, &interface_ip, Uncore_device); + + // read buffers. + data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation + // //8 means converting bit to Byte + interface_ip.cache_sz = + data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; + interface_ip.line_sz = data; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 0; // XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //write buffer - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; + readBuffer->area.set_area(readBuffer->area.get_area() + + readBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + readBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + + // write buffer + data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation + // //8 means converting bit to Byte + interface_ip.cache_sz = + data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; + interface_ip.line_sz = data; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); + writeBuffer->area.set_area(writeBuffer->area.get_area() + + writeBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + writeBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); } -void MCFrontEnd::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; - - } - else - { - //init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - //For each channel, each memory word need to check the address data to achieve best scheduling results. - //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; - } - - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - -// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access* -// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ -// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic - + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic - + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access* - readBuffer->local_result.power.readOp.dynamic+ - readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access* - writeBuffer->local_result.power.readOp.dynamic+ - writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) - { - power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; +void MCFrontEnd::computeEnergy(bool is_tdp) { + if (is_tdp) { + // init stats for Peak + frontendBuffer->stats_t.readAc.access = + frontendBuffer->l_ip.num_search_ports; + frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->tdp_stats = frontendBuffer->stats_t; + + readBuffer->stats_t.readAc.access = + readBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; + readBuffer->stats_t.writeAc.access = + readBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; + readBuffer->tdp_stats = readBuffer->stats_t; + + writeBuffer->stats_t.readAc.access = + writeBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; + writeBuffer->stats_t.writeAc.access = + writeBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; + writeBuffer->tdp_stats = writeBuffer->stats_t; + + } else { + // init stats for runtime power (RTP) + frontendBuffer->stats_t.readAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * + mcp.dataBusWidth / 72; + // For each channel, each memory word need to check the address data to + // achieve best scheduling results. and this need to be done on all physical + // DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 + frontendBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * + mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats = frontendBuffer->stats_t; + + readBuffer->stats_t.readAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer->rtp_stats = readBuffer->stats_t; + + writeBuffer->stats_t.readAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer->rtp_stats = writeBuffer->stats_t; + } - } - else - { - rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; - } + frontendBuffer->power_t.reset(); + readBuffer->power_t.reset(); + writeBuffer->power_t.reset(); + + // frontendBuffer->power_t.readOp.dynamic += + //(frontendBuffer->stats_t.readAc.access* + // (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ + // frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); + + frontendBuffer->power_t.readOp.dynamic += + (frontendBuffer->stats_t.readAc.access + + frontendBuffer->stats_t.writeAc.access) * + frontendBuffer->local_result.power.searchOp.dynamic + + frontendBuffer->stats_t.readAc.access * + frontendBuffer->local_result.power.readOp.dynamic + + frontendBuffer->stats_t.writeAc.access * + frontendBuffer->local_result.power.writeOp.dynamic; + + readBuffer->power_t.readOp.dynamic += + (readBuffer->stats_t.readAc.access * + readBuffer->local_result.power.readOp.dynamic + + readBuffer->stats_t.writeAc.access * + readBuffer->local_result.power.writeOp.dynamic); + writeBuffer->power_t.readOp.dynamic += + (writeBuffer->stats_t.readAc.access * + writeBuffer->local_result.power.readOp.dynamic + + writeBuffer->stats_t.writeAc.access * + writeBuffer->local_result.power.writeOp.dynamic); + + if (is_tdp) { + power = power + frontendBuffer->power_t + readBuffer->power_t + + writeBuffer->power_t + + (frontendBuffer->local_result.power + + readBuffer->local_result.power + writeBuffer->local_result.power) * + pppm_lkg; + + } else { + rt_power = + rt_power + frontendBuffer->power_t + readBuffer->power_t + + writeBuffer->power_t + + (frontendBuffer->local_result.power + readBuffer->local_result.power + + writeBuffer->local_result.power) * + pppm_lkg; + rt_power.readOp.dynamic = + rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * + mcp.num_mcs * mcp.executionTime; + } } -void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? frontendBuffer->power.readOp.power_gated_with_long_channel_leakage : frontendBuffer->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Front End ROB:" << endl; + cout << indent_str_next + << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << frontendBuffer->power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? frontendBuffer->power.readOp + .power_gated_with_long_channel_leakage + : frontendBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << frontendBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + + cout << endl; + cout << indent_str << "Read Buffer:" << endl; + cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << readBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? readBuffer->power.readOp + .power_gated_with_long_channel_leakage + : readBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << readBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "Write Buffer:" << endl; + cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << writeBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? writeBuffer->power.readOp + .power_gated_with_long_channel_leakage + : writeBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << writeBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + } else { + cout << indent_str << "Front End ROB:" << endl; + cout << indent_str_next + << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << frontendBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + cout << endl; + cout << indent_str << "Read Buffer:" << endl; + cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << readBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + cout << endl; + cout << indent_str << "Write Buffer:" << endl; + cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << writeBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + } } - -MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - frontend(0), - transecEngine(0), - PHY(0), - pipeLogic(0) -{ +MemoryController::MemoryController(ParseXML *XML_interface, + InputParameter *interface_ip_, + enum MemoryCtrl_type mc_type_) + : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), + frontend(0), transecEngine(0), PHY(0), pipeLogic(0) { /* All computations are for a single MC * */ interface_ip.wire_is_mat_type = 2; interface_ip.wire_os_mat_type = 2; - interface_ip.wt =Global; + interface_ip.wt = Global; set_mc_param(); frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ frontend->area.get_area()); + area.set_area(area.get_area() + frontend->area.get_area()); transecEngine = new MCBackend(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ transecEngine->area.get_area()); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY = new MCPHY(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ PHY->area.get_area()); + area.set_area(area.get_area() + transecEngine->area.get_area()); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY = new MCPHY(&interface_ip, mcp, mc_type); + area.set_area(area.get_area() + PHY->area.get_area()); } - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// transecEngine.initialize(&interface_ip); -// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; -// transecEngine.memDataWidth = dataBusWidth; -// transecEngine.memRank = XML->sys.mem.number_ranks; -// //transecEngine.memAccesses=XML->sys.mc.memory_accesses; -// //transecEngine.llcBlocksize=llcBlockSize; -// transecEngine.compute(); -// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ; -// area.set_area(area.get_area()+ transecEngine.area.get_area()); -// ///cout<<"area="<sys.mem.peak_transfer_rate; -// PHY.memDataWidth = dataBusWidth; -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// PHY.compute(); -// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ; -// area.set_area(area.get_area()+ PHY.area.get_area()); - ///cout<<"area="<sys.core[0].opcode_width + dataBusWidth; -// pipeLogic = new pipeline(is_default, &interface_ip); -// //pipeLogic.init_pipeline(is_default, &interface_ip); -// pipeLogic->compute_pipeline(); -// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); -// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead -// -// -//// //clock -//// clockNetwork.init_wire_external(is_default, &interface_ip); -//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -//// clockNetwork.end_wiring_level =5;//toplevel metal -//// clockNetwork.start_wiring_level =5;//toplevel metal -//// clockNetwork.num_regs = pipeLogic.tot_stage_vector; -//// clockNetwork.optimize_wire(); - - -} -void MemoryController::computeEnergy(bool is_tdp) -{ - - frontend->computeEnergy(is_tdp); - transecEngine->computeEnergy(is_tdp); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY->computeEnergy(is_tdp); - } - if (is_tdp) - { - power = power + frontend->power + transecEngine->power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - power = power + PHY->power; - } - } - else - { - rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - rt_power = rt_power + PHY->rt_power; - } - } + //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, + // Run the RTL code from OpenSparc. + // transecEngine.initialize(&interface_ip); + // transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; + // transecEngine.memDataWidth = dataBusWidth; + // transecEngine.memRank = XML->sys.mem.number_ranks; + // //transecEngine.memAccesses=XML->sys.mc.memory_accesses; + // //transecEngine.llcBlocksize=llcBlockSize; + // transecEngine.compute(); + // transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) + // ; area.set_area(area.get_area()+ transecEngine.area.get_area()); + // ///cout<<"area="<sys.mem.peak_transfer_rate; + // PHY.memDataWidth = dataBusWidth; + // //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power + // //PHY.llcBlocksize=llcBlockSize; + // PHY.compute(); + // PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) + // ; area.set_area(area.get_area()+ PHY.area.get_area()); + /// cout<<"area="<sys.core[0].opcode_width + dataBusWidth; pipeLogic = new + // pipeline(is_default, &interface_ip); + // //pipeLogic.init_pipeline(is_default, &interface_ip); + // pipeLogic->compute_pipeline(); + // area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); + // area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing + // overhead + // + // + //// //clock + //// clockNetwork.init_wire_external(is_default, &interface_ip); + //// clockNetwork.clk_area =area*1.1;//10% of placement overhead. + /// rule of thumb / clockNetwork.end_wiring_level =5;//toplevel metal / + /// clockNetwork.start_wiring_level =5;//toplevel metal / + /// clockNetwork.num_regs = pipeLogic.tot_stage_vector; / + /// clockNetwork.optimize_wire(); } +void MemoryController::computeEnergy(bool is_tdp) { -void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << "Memory Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout<2){ - frontend->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Transaction Engine:" << endl; - cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? transecEngine->power.readOp.power_gated_with_long_channel_leakage : transecEngine->power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <computeEnergy(is_tdp); + transecEngine->computeEnergy(is_tdp); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY->computeEnergy(is_tdp); + } + if (is_tdp) { + power = power + frontend->power + transecEngine->power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + power = power + PHY->power; + } + } else { + rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + rt_power = rt_power + PHY->rt_power; + } + } +} +void MemoryController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "Memory Controller:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic / mcp.executionTime + << " W" << endl; + cout << endl; + cout << indent_str << "Front End Engine:" << endl; + cout << indent_str_next << "Area = " << frontend->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << frontend->power.readOp.dynamic * mcp.clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? frontend->power.readOp.longer_channel_leakage + : frontend->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? frontend->power.readOp.power_gated_with_long_channel_leakage + : frontend->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << frontend->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + if (plevel > 2) { + frontend->displayEnergy(indent + 4, is_tdp); + } + cout << indent_str << "Transaction Engine:" << endl; + cout << indent_str_next + << "Area = " << transecEngine->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << transecEngine->power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? transecEngine->power.readOp.longer_channel_leakage + : transecEngine->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? transecEngine->power.readOp + .power_gated_with_long_channel_leakage + : transecEngine->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << transecEngine->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + cout << indent_str << "PHY:" << endl; + cout << indent_str_next << "Area = " << PHY->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << PHY->power.readOp.dynamic * mcp.clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? PHY->power.readOp.longer_channel_leakage + : PHY->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? PHY->power.readOp.power_gated_with_long_channel_leakage + : PHY->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << PHY->rt_power.readOp.dynamic / mcp.executionTime << " W" << endl; + cout << endl; + } + } else { + cout << "Memory Controller:" << endl; + cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << endl; + cout << endl; + } } -void MemoryController::set_mc_param() -{ - - if (mc_type==MC) - { - mcp.clockRate =XML->sys.mc.mc_clock*2;//DDR double pumped - mcp.clockRate *= 1e6; - mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead - mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width; - mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width; - mcp.opcodeW =16; - mcp.num_mcs = XML->sys.mc.number_mcs; - mcp.num_channels = XML->sys.mc.memory_channels_per_mc; - mcp.reads = XML->sys.mc.memory_reads; - mcp.writes = XML->sys.mc.memory_writes; - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. - mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; - mcp.memRank = XML->sys.mc.number_ranks; - //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers - //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - //PHY.llcBlocksize=llcBlockSize; - mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared - mcp.LVDS = XML->sys.mc.LVDS; - mcp.type = XML->sys.mc.type; - mcp.withPHY = XML->sys.mc.withPHY; - - if ( XML->sys.mc.vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.mc.vdd; - interface_ip.lop_Vdd = XML->sys.mc.vdd; - interface_ip.lstp_Vdd = XML->sys.mc.vdd; - } - if ( XML->sys.mc.power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.mc.power_gating_vcc; - - } - } -// else if (mc_type==FLASHC) -// { -// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped -// mcp.clockRate *= 1e6; -// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -// -// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead -// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width; -// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; -// mcp.opcodeW =16; -// mcp.num_mcs = XML->sys.flashc.number_mcs; -// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; -// mcp.reads = XML->sys.flashc.memory_reads; -// mcp.writes = XML->sys.flashc.memory_writes; -// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; -// mcp.memRank = XML->sys.flashc.number_ranks; -// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared -// mcp.LVDS = XML->sys.flashc.LVDS; -// mcp.type = XML->sys.flashc.type; -// } - else - { - cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <sys.mc.mc_clock * 2; // DDR double pumped + mcp.clockRate *= 1e6; + mcp.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + + mcp.llcBlockSize = int(ceil(XML->sys.mc.llc_line_length / 8.0)) + + XML->sys.mc.llc_line_length; // ecc overhead + mcp.dataBusWidth = + int(ceil(XML->sys.mc.databus_width / 8.0)) + XML->sys.mc.databus_width; + mcp.addressBusWidth = int( + ceil(XML->sys.mc.addressbus_width)); // XML->sys.physical_address_width; + mcp.opcodeW = 16; + mcp.num_mcs = XML->sys.mc.number_mcs; + mcp.num_channels = XML->sys.mc.memory_channels_per_mc; + mcp.reads = XML->sys.mc.memory_reads; + mcp.writes = XML->sys.mc.memory_writes; + //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better + // numbers, Run the RTL code from OpenSparc. + mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; + mcp.memRank = XML->sys.mc.number_ranks; + //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers + // PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power + // PHY.llcBlocksize=llcBlockSize; + mcp.frontend_duty_cycle = 0.5; // for max power, the actual off-chip links + // is bidirectional but time shared + mcp.LVDS = XML->sys.mc.LVDS; + mcp.type = XML->sys.mc.type; + mcp.withPHY = XML->sys.mc.withPHY; + + if (XML->sys.mc.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.mc.vdd; + interface_ip.lop_Vdd = XML->sys.mc.vdd; + interface_ip.lstp_Vdd = XML->sys.mc.vdd; + } + if (XML->sys.mc.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.mc.power_gating_vcc; + } + } + // else if (mc_type==FLASHC) + // { + // mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double + // pumped mcp.clockRate *= 1e6; mcp.executionTime + // = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); + // + // mcp.llcBlockSize + //=int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc + // overhead mcp.dataBusWidth + // =int(ceil(XML->sys.flashc.databus_width/8.0)) + + // XML->sys.flashc.databus_width; mcp.addressBusWidth + //=int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; + // mcp.opcodeW =16; + // mcp.num_mcs = XML->sys.flashc.number_mcs; + // mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; + // mcp.reads = XML->sys.flashc.memory_reads; + // mcp.writes = XML->sys.flashc.memory_writes; + // //+++++++++Transaction engine +++++++++++++++++ ////TODO needs + // better numbers, Run the RTL code from OpenSparc. + // mcp.peakDataTransferRate = + // XML->sys.flashc.peak_transfer_rate; mcp.memRank = + // XML->sys.flashc.number_ranks; + // //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs + // better numbers + // //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max + // power + // //PHY.llcBlocksize=llcBlockSize; + // mcp.frontend_duty_cycle = 0.5;//for max power, the actual + // off-chip links is bidirectional but time shared mcp.LVDS = + // XML->sys.flashc.LVDS; mcp.type = XML->sys.flashc.type; + // } + else { + cout << "Unknown memory controller type: neither DRAM controller nor Flash " + "controller" + << endl; + exit(0); + } } -MCFrontEnd ::~MCFrontEnd(){ +MCFrontEnd ::~MCFrontEnd() { - if(MC_arb) {delete MC_arb; MC_arb = 0;} - if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;} - if(readBuffer) {delete readBuffer; readBuffer = 0;} - if(writeBuffer) {delete writeBuffer; writeBuffer = 0;} + if (MC_arb) { + delete MC_arb; + MC_arb = 0; + } + if (frontendBuffer) { + delete frontendBuffer; + frontendBuffer = 0; + } + if (readBuffer) { + delete readBuffer; + readBuffer = 0; + } + if (writeBuffer) { + delete writeBuffer; + writeBuffer = 0; + } } -MemoryController ::~MemoryController(){ +MemoryController ::~MemoryController() { - if(frontend) {delete frontend; frontend = 0;} - if(transecEngine) {delete transecEngine; transecEngine = 0;} - if(PHY) {delete PHY; PHY = 0;} - if(pipeLogic) {delete pipeLogic; pipeLogic = 0;} + if (frontend) { + delete frontend; + frontend = 0; + } + if (transecEngine) { + delete transecEngine; + transecEngine = 0; + } + if (PHY) { + delete PHY; + PHY = 0; + } + if (pipeLogic) { + delete pipeLogic; + pipeLogic = 0; + } } - diff --git a/memoryctrl.h b/memoryctrl.h index 47a56e6..1c5507b 100644 --- a/memoryctrl.h +++ b/memoryctrl.h @@ -33,80 +33,86 @@ #define MEMORYCTRL_H_ #include "XML_Parse.h" +#include "logic.h" #include "parameter.h" //#include "io.h" #include "array.h" //#include "Undifferentiated_Core_Area.h" -#include #include "basic_components.h" +#include + class MCBackend : public Component { - public: - InputParameter l_ip; - uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCBackend(){}; +public: + InputParameter l_ip; + uca_org_t local_result; + enum MemoryCtrl_type mc_type; + MCParam mcp; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + MCBackend(InputParameter *interface_ip_, const MCParam &mcp_, + enum MemoryCtrl_type mc_type_); + void compute(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MCBackend(){}; }; class MCPHY : public Component { - public: - InputParameter l_ip; - uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCPHY(){}; +public: + InputParameter l_ip; + uca_org_t local_result; + enum MemoryCtrl_type mc_type; + MCParam mcp; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + MCPHY(InputParameter *interface_ip_, const MCParam &mcp_, + enum MemoryCtrl_type mc_type_); + void compute(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MCPHY(){}; }; class MCFrontEnd : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - selection_logic * MC_arb; - ArrayST * frontendBuffer; - ArrayST * readBuffer; - ArrayST * writeBuffer; +public: + ParseXML *XML; + InputParameter interface_ip; + enum MemoryCtrl_type mc_type; + MCParam mcp; + selection_logic *MC_arb; + ArrayST *frontendBuffer; + ArrayST *readBuffer; + ArrayST *writeBuffer; - MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCFrontEnd(); + MCFrontEnd(ParseXML *XML_interface, InputParameter *interface_ip_, + const MCParam &mcp_, enum MemoryCtrl_type mc_type_); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MCFrontEnd(); }; class MemoryController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - MCFrontEnd * frontend; - MCBackend * transecEngine; - MCPHY * PHY; - Pipeline * pipeLogic; +public: + ParseXML *XML; + InputParameter interface_ip; + enum MemoryCtrl_type mc_type; + MCParam mcp; + MCFrontEnd *frontend; + MCBackend *transecEngine; + MCPHY *PHY; + Pipeline *pipeLogic; - //clock_network clockNetwork; - MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_); - void set_mc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MemoryController(); + // clock_network clockNetwork; + MemoryController(ParseXML *XML_interface, InputParameter *interface_ip_, + enum MemoryCtrl_type mc_type_); + void set_mc_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MemoryController(); }; #endif /* MEMORYCTRL_H_ */ diff --git a/noc.cc b/noc.cc index d1ab6c9..4fed6ad 100644 --- a/noc.cc +++ b/noc.cc @@ -29,376 +29,493 @@ * ***************************************************************************/ +#include "noc.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" #include "io.h" #include "parameter.h" -#include "const.h" -#include "basic_circuit.h" -#include + #include -#include "XML_Parse.h" -#include -#include #include -#include "noc.h" - - +#include +#include +#include -NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_) -:XML(XML_interface), -ithNoC(ithNoC_), -interface_ip(*interface_ip_), -router(0), -link_bus(0), -link_bus_exist(false), -router_exist(false), -M_traffic_pattern(M_traffic_pattern_) -{ - /* - * initialize, compute and optimize individual components. - */ - - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_noc_param(); - local_result=init_interface(&interface_ip); - scktRatio = g_tp.sckt_co_eff; - - if (nocdynp.type) - {/* - * if NOC compute router, router links must be computed separately - * and called from external - * since total chip area must be known first - */ - init_router(); - } - else - { - init_link_bus(link_len_); //if bus compute bus - } - - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb - // clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); +NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, + double M_traffic_pattern_, double link_len_) + : XML(XML_interface), ithNoC(ithNoC_), interface_ip(*interface_ip_), + router(0), link_bus(0), link_bus_exist(false), router_exist(false), + M_traffic_pattern(M_traffic_pattern_) { + /* + * initialize, compute and optimize individual components. + */ + + if (XML->sys.Embedded) { + interface_ip.wt = Global_30; + interface_ip.wire_is_mat_type = 0; + interface_ip.wire_os_mat_type = 1; + } else { + interface_ip.wt = Global; + interface_ip.wire_is_mat_type = 2; + interface_ip.wire_os_mat_type = 2; + } + set_noc_param(); + local_result = init_interface(&interface_ip); + scktRatio = g_tp.sckt_co_eff; + + if (nocdynp.type) { /* + * if NOC compute router, router links must be computed + * separately and called from external since total chip + * area must be known first + */ + init_router(); + } else { + init_link_bus(link_len_); // if bus compute bus + } + + // //clock power + // clockNetwork.init_wire_external(is_default, &interface_ip); + // clockNetwork.clk_area =area*1.1;//10% of placement overhead. + // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal + // clockNetwork.start_wiring_level =5;//toplevel metal + // clockNetwork.num_regs = corepipe.tot_stage_vector; + // clockNetwork.optimize_wire(); } -void NoC::init_router() -{ - router = new Router(nocdynp.flit_size, - nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc, - nocdynp.virtual_channel_per_port, &(g_tp.peri_global), - nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern); - //router->print_router(); - area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes); - - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - double pg_reduction_retain = power_gating_leakage_reduction(false);//state retaining array structure; - double pg_reduction_nonretain = power_gating_leakage_reduction(false);//non-state-retaining array structure; - - router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction; - router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction; - router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction; - router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction; - - router->buffer.power.readOp.power_gated_leakage = router->buffer.power.readOp.leakage * pg_reduction_retain;//TODO: this is a simplified version; should use the power_gated_leakage generated in buff - router->crossbar.power.readOp.power_gated_leakage = router->crossbar.power.readOp.leakage * pg_reduction_nonretain; - router->arbiter.power.readOp.power_gated_leakage = router->arbiter.power.readOp.leakage * pg_reduction_nonretain; - router->power.readOp.power_gated_leakage = router->buffer.power.readOp.power_gated_leakage - + router->crossbar.power.readOp.power_gated_leakage - + router->arbiter.power.readOp.power_gated_leakage; - - router->buffer.power.readOp.power_gated_with_long_channel_leakage = router->buffer.power.readOp.power_gated_leakage * long_channel_device_reduction;//TODO: this is a simplified version; should use the power_gated_leakage generated in buff - router->crossbar.power.readOp.power_gated_with_long_channel_leakage = router->crossbar.power.readOp.power_gated_leakage * long_channel_device_reduction; - router->arbiter.power.readOp.power_gated_with_long_channel_leakage = router->arbiter.power.readOp.power_gated_leakage * long_channel_device_reduction; - router->power.readOp.power_gated_with_long_channel_leakage = router->buffer.power.readOp.power_gated_with_long_channel_leakage - + router->crossbar.power.readOp.power_gated_with_long_channel_leakage - + router->arbiter.power.readOp.power_gated_with_long_channel_leakage; - - router_exist = true; +void NoC::init_router() { + router = new Router( + nocdynp.flit_size, + nocdynp.virtual_channel_per_port * nocdynp.input_buffer_entries_per_vc, + nocdynp.virtual_channel_per_port, &(g_tp.peri_global), + nocdynp.input_ports, nocdynp.output_ports, M_traffic_pattern); + // router->print_router(); + area.set_area(area.get_area() + + router->area.get_area() * nocdynp.total_nodes); + + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + double pg_reduction_retain = + power_gating_leakage_reduction(false); // state retaining array structure; + double pg_reduction_nonretain = power_gating_leakage_reduction( + false); // non-state-retaining array structure; + + router->power.readOp.longer_channel_leakage = + router->power.readOp.leakage * long_channel_device_reduction; + router->buffer.power.readOp.longer_channel_leakage = + router->buffer.power.readOp.leakage * long_channel_device_reduction; + router->crossbar.power.readOp.longer_channel_leakage = + router->crossbar.power.readOp.leakage * long_channel_device_reduction; + router->arbiter.power.readOp.longer_channel_leakage = + router->arbiter.power.readOp.leakage * long_channel_device_reduction; + + router->buffer.power.readOp.power_gated_leakage = + router->buffer.power.readOp.leakage * + pg_reduction_retain; // TODO: this is a simplified version; should use the + // power_gated_leakage generated in buff + router->crossbar.power.readOp.power_gated_leakage = + router->crossbar.power.readOp.leakage * pg_reduction_nonretain; + router->arbiter.power.readOp.power_gated_leakage = + router->arbiter.power.readOp.leakage * pg_reduction_nonretain; + router->power.readOp.power_gated_leakage = + router->buffer.power.readOp.power_gated_leakage + + router->crossbar.power.readOp.power_gated_leakage + + router->arbiter.power.readOp.power_gated_leakage; + + router->buffer.power.readOp.power_gated_with_long_channel_leakage = + router->buffer.power.readOp.power_gated_leakage * + long_channel_device_reduction; // TODO: this is a simplified version; + // should use the power_gated_leakage + // generated in buff + router->crossbar.power.readOp.power_gated_with_long_channel_leakage = + router->crossbar.power.readOp.power_gated_leakage * + long_channel_device_reduction; + router->arbiter.power.readOp.power_gated_with_long_channel_leakage = + router->arbiter.power.readOp.power_gated_leakage * + long_channel_device_reduction; + router->power.readOp.power_gated_with_long_channel_leakage = + router->buffer.power.readOp.power_gated_with_long_channel_leakage + + router->crossbar.power.readOp.power_gated_with_long_channel_leakage + + router->arbiter.power.readOp.power_gated_with_long_channel_leakage; + + router_exist = true; } -void NoC ::init_link_bus(double link_len_) -{ - +void NoC ::init_link_bus(double link_len_) { -// if (nocdynp.min_ports==1 ) - if (nocdynp.type) - link_name = "Links"; - else - link_name = "Bus"; + // if (nocdynp.min_ports==1 ) + if (nocdynp.type) + link_name = "Links"; + else + link_name = "Bus"; - link_len=link_len_; - assert(link_len>0); + link_len = link_len_; + assert(link_len > 0); - interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate; - interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate; + interface_ip.throughput = nocdynp.link_throughput / nocdynp.clockRate; + interface_ip.latency = nocdynp.link_latency / nocdynp.clockRate; - link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2; + link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes) / 2; - if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors - link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size, - link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc); + if (nocdynp.total_nodes > 1) + link_len /= 2; // All links are shared by neighbors + link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size, + link_len, &interface_ip, 3, true /*pipelinable*/, + nocdynp.route_over_perc); - link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area() - * nocdynp.global_linked_ports); + link_bus_tot_per_Router.area.set_area( + link_bus_tot_per_Router.area.get_area() + + link_bus->area.get_area() * nocdynp.global_linked_ports); - area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes); - link_bus_exist = true; + area.set_area(area.get_area() + + link_bus_tot_per_Router.area.get_area() * nocdynp.total_nodes); + link_bus_exist = true; } -void NoC::computeEnergy(bool is_tdp) -{ - //power_point_product_masks - double pppm_t[4] = {1,1,1,1}; - double M=nocdynp.duty_cycle; - if (is_tdp) - { - //init stats for TDP - stats_t.readAc.access = M; - tdp_stats = stats_t; - if (router_exist) - { - set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern - router->power = router->power*pppm_t; - set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes); - power = power + router->power*pppm_t; - } - if (link_bus_exist) - { - if (nocdynp.type) - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports); - //reset traffic pattern; local port do not have router links - else - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern - - link_bus_tot_per_Router.power = link_bus->power*pppm_t; - - set_pppm(pppm_t, nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); - power = power + link_bus_tot_per_Router.power*pppm_t; - - } - } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses; - rtp_stats = stats_t; - set_pppm(pppm_t, 1, 0 , 0, 0); - if (router_exist) - { - router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ; - router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ; - router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ; - - router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t + - router->power*pppm_lkg;//TDP power must be calculated first! - rt_power = rt_power + router->rt_power; - } - if (link_bus_exist) - { - set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access); - link_bus->rt_power = link_bus->power * pppm_t; - rt_power = rt_power + link_bus->rt_power; - } - - } +void NoC::computeEnergy(bool is_tdp) { + // power_point_product_masks + double pppm_t[4] = {1, 1, 1, 1}; + double M = nocdynp.duty_cycle; + if (is_tdp) { + // init stats for TDP + stats_t.readAc.access = M; + tdp_stats = stats_t; + if (router_exist) { + set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern + router->power = router->power * pppm_t; + set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, + nocdynp.total_nodes, nocdynp.total_nodes); + power = power + router->power * pppm_t; + } + if (link_bus_exist) { + if (nocdynp.type) + set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), + nocdynp.global_linked_ports, nocdynp.global_linked_ports, + nocdynp.global_linked_ports); + // reset traffic pattern; local port do not have router links + else + set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports), + nocdynp.global_linked_ports, nocdynp.global_linked_ports, + nocdynp.global_linked_ports); // reset traffic pattern + + link_bus_tot_per_Router.power = link_bus->power * pppm_t; + + set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, + nocdynp.total_nodes, nocdynp.total_nodes); + power = power + link_bus_tot_per_Router.power * pppm_t; + } + } else { + // init stats for runtime power (RTP) + stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses; + rtp_stats = stats_t; + set_pppm(pppm_t, 1, 0, 0, 0); + if (router_exist) { + router->buffer.rt_power.readOp.dynamic = + (router->buffer.power.readOp.dynamic + + router->buffer.power.writeOp.dynamic) * + rtp_stats.readAc.access; + router->crossbar.rt_power.readOp.dynamic = + router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access; + router->arbiter.rt_power.readOp.dynamic = + router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access; + + router->rt_power = + router->rt_power + + (router->buffer.rt_power + router->crossbar.rt_power + + router->arbiter.rt_power) * + pppm_t + + router->power * pppm_lkg; // TDP power must be calculated first! + rt_power = rt_power + router->rt_power; + } + if (link_bus_exist) { + set_pppm(pppm_t, rtp_stats.readAc.access, 1, 1, rtp_stats.readAc.access); + link_bus->rt_power = link_bus->power * pppm_t; + rt_power = rt_power + link_bus->rt_power; + } + } } - -void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - double M =M_traffic_pattern*nocdynp.duty_cycle; - /*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc; - * When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to - * be applied together with McPAT's extra traffic pattern. - * */ - if (is_tdp) - { - cout << name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (power.readOp.power_gated_leakage * (long_channel? power.readOp.longer_channel_leakage/power.readOp.leakage:1) ) << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout<2){ - cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl; - cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl; - cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic) - *nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl; - cout << indent_str<< indent_str_next << "Subthreshold Leakage = " - << (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl; - if (power_gating) cout << indent_str<< indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? router->buffer.power.readOp.power_gated_with_long_channel_leakage : router->buffer.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl; - cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + double M = M_traffic_pattern * nocdynp.duty_cycle; + /*only router as a whole has been applied the M_traffic_pattern(0.6 by + * default) factor in router.cc; When power of crossbars, arbiters, etc need + * to be displayed, the M_traffic_pattern factor need to be applied together + * with McPAT's extra traffic pattern. + * */ + if (is_tdp) { + cout << name << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * nocdynp.clockRate + << " W" << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (power.readOp.power_gated_leakage * + (long_channel + ? power.readOp.longer_channel_leakage / power.readOp.leakage + : 1)) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str << "Runtime Dynamic = " + << rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; + cout << endl; + + if (router_exist) { + cout << indent_str << "Router: " << endl; + cout << indent_str_next << "Area = " << router->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << router->power.readOp.dynamic * nocdynp.clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? router->power.readOp.longer_channel_leakage + : router->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? router->power.readOp.power_gated_with_long_channel_leakage + : router->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << router->rt_power.readOp.dynamic / nocdynp.executionTime << " W" + << endl; + cout << endl; + if (plevel > 2) { + cout << indent_str << indent_str << "Virtual Channel Buffer:" << endl; + cout << indent_str << indent_str_next << "Area = " + << router->buffer.area.get_area() * 1e-6 * nocdynp.input_ports + << " mm^2" << endl; + cout << indent_str << indent_str_next << "Peak Dynamic = " + << (router->buffer.power.readOp.dynamic + + router->buffer.power.writeOp.dynamic) * + nocdynp.min_ports * M * nocdynp.clockRate + << " W" << endl; + cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? router->buffer.power.readOp.longer_channel_leakage * + nocdynp.input_ports + : router->buffer.power.readOp.leakage * + nocdynp.input_ports) + << " W" << endl; + if (power_gating) + cout << indent_str << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router->buffer.power.readOp + .power_gated_with_long_channel_leakage + : router->buffer.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << indent_str_next << "Gate Leakage = " + << router->buffer.power.readOp.gate_leakage * nocdynp.input_ports + << " W" << endl; + cout << indent_str << indent_str_next << "Runtime Dynamic = " + << router->buffer.rt_power.readOp.dynamic / nocdynp.executionTime + << " W" << endl; + cout << endl; + cout << indent_str << indent_str << "Crossbar:" << endl; + cout << indent_str << indent_str_next + << "Area = " << router->crossbar.area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str << indent_str_next << "Peak Dynamic = " + << router->crossbar.power.readOp.dynamic * nocdynp.clockRate * + nocdynp.min_ports * M + << " W" << endl; + cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? router->crossbar.power.readOp.longer_channel_leakage + : router->crossbar.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router->crossbar.power.readOp + .power_gated_with_long_channel_leakage + : router->crossbar.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << indent_str_next + << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage + << " W" << endl; + cout << indent_str << indent_str_next << "Runtime Dynamic = " + << router->crossbar.rt_power.readOp.dynamic / nocdynp.executionTime + << " W" << endl; + cout << endl; + cout << indent_str << indent_str << "Arbiter:" << endl; + cout << indent_str << indent_str_next << "Peak Dynamic = " + << router->arbiter.power.readOp.dynamic * nocdynp.clockRate * + nocdynp.min_ports * M + << " W" << endl; + cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? router->arbiter.power.readOp.longer_channel_leakage + : router->arbiter.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router->arbiter.power.readOp + .power_gated_with_long_channel_leakage + : router->arbiter.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << indent_str_next + << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage + << " W" << endl; + cout << indent_str << indent_str_next << "Runtime Dynamic = " + << router->arbiter.rt_power.readOp.dynamic / nocdynp.executionTime + << " W" << endl; + cout << endl; + } + } + if (link_bus_exist) { + cout << indent_str << (nocdynp.type ? "Per Router " : "") << link_name + << ": " << endl; + cout << indent_str_next + << "Area = " << link_bus_tot_per_Router.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << link_bus_tot_per_Router.power.readOp.dynamic * nocdynp.clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? link_bus_tot_per_Router.power.readOp.longer_channel_leakage + : link_bus_tot_per_Router.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? link_bus_tot_per_Router.power.readOp + .power_gated_with_long_channel_leakage + : link_bus_tot_per_Router.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " + << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << link_bus->rt_power.readOp.dynamic / nocdynp.executionTime << " W" + << endl; + cout << endl; + } + } else { + // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = + //" + //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " + // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << + // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next + //<< "Load Store Unit Peak Dynamic = " << + // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << + // lsu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Load Store Unit Gate Leakage = " << + // lsu->rt_power.readOp.gate_leakage + //<< " W" << endl; cout << indent_str_next << "Memory Management Unit + // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << + // endl; cout << indent_str_next << "Memory Management Unit Subthreshold + // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout + // << indent_str_next << "Memory Management Unit Gate Leakage = " << + // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Execution Unit Peak Dynamic = " << + // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Execution Unit Subthreshold Leakage = " << + // exu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Execution Unit Gate Leakage = " << + // exu->rt_power.readOp.gate_leakage + //<< " W" << endl; + } } -void NoC::set_noc_param() -{ - - nocdynp.type = XML->sys.NoC[ithNoC].type; - nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate; - nocdynp.clockRate *= 1e6; - nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits; - if (nocdynp.type) - { - nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports; - nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1 - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1); - /* - * Except local i/o ports, all ports needs links( global_linked_ports); - * However only min_ports can be fully active simultaneously - * since the fewer number of ports (input or output ) is the bottleneck. - */ - } - else - { - nocdynp.input_ports = 1; - nocdynp.output_ports = 1; - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = 1; - } - - nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port; - nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc; - - nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes; - nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes; - nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes; - nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle; - nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link; - nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput; - nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency; - nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage; - nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc; - - assert (nocdynp.chip_coverage <=1); - assert (nocdynp.route_over_perc <=1); - - if (nocdynp.type) - name = "NOC"; - else - name = "BUSES"; - - if ( XML->sys.NoC[ithNoC].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.NoC[ithNoC].vdd; - interface_ip.lop_Vdd = XML->sys.NoC[ithNoC].vdd; - interface_ip.lstp_Vdd = XML->sys.NoC[ithNoC].vdd; - } - - if ( XML->sys.NoC[ithNoC].power_gating_vcc > -1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.NoC[ithNoC].power_gating_vcc; - - } - +void NoC::set_noc_param() { + + nocdynp.type = XML->sys.NoC[ithNoC].type; + nocdynp.clockRate = XML->sys.NoC[ithNoC].clockrate; + nocdynp.clockRate *= 1e6; + nocdynp.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + + nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits; + if (nocdynp.type) { + nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports; + nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports; // later minus 1 + nocdynp.min_ports = min(nocdynp.input_ports, nocdynp.output_ports); + nocdynp.global_linked_ports = + (nocdynp.input_ports - 1) + (nocdynp.output_ports - 1); + /* + * Except local i/o ports, all ports needs links( global_linked_ports); + * However only min_ports can be fully active simultaneously + * since the fewer number of ports (input or output ) is the bottleneck. + */ + } else { + nocdynp.input_ports = 1; + nocdynp.output_ports = 1; + nocdynp.min_ports = min(nocdynp.input_ports, nocdynp.output_ports); + nocdynp.global_linked_ports = 1; + } + + nocdynp.virtual_channel_per_port = + XML->sys.NoC[ithNoC].virtual_channel_per_port; + nocdynp.input_buffer_entries_per_vc = + XML->sys.NoC[ithNoC].input_buffer_entries_per_vc; + + nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes; + nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes; + nocdynp.total_nodes = nocdynp.horizontal_nodes * nocdynp.vertical_nodes; + nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle; + nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link; + nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput; + nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency; + nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage; + nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc; + + assert(nocdynp.chip_coverage <= 1); + assert(nocdynp.route_over_perc <= 1); + + if (nocdynp.type) + name = "NOC"; + else + name = "BUSES"; + + if (XML->sys.NoC[ithNoC].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.NoC[ithNoC].vdd; + interface_ip.lop_Vdd = XML->sys.NoC[ithNoC].vdd; + interface_ip.lstp_Vdd = XML->sys.NoC[ithNoC].vdd; + } + + if (XML->sys.NoC[ithNoC].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.NoC[ithNoC].power_gating_vcc; + } } +NoC ::~NoC() { -NoC ::~NoC(){ - - if(router) {delete router; router = 0;} - if(link_bus) {delete link_bus; link_bus = 0;} + if (router) { + delete router; + router = 0; + } + if (link_bus) { + delete link_bus; + link_bus = 0; + } } diff --git a/noc.h b/noc.h index 639ec26..bacb0cd 100644 --- a/noc.h +++ b/noc.h @@ -32,44 +32,45 @@ #ifndef NOC_H_ #define NOC_H_ #include "XML_Parse.h" -#include "logic.h" -#include "parameter.h" #include "array.h" -#include "interconnect.h" #include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" #include "router.h" -class NoC :public Component { - public: - - ParseXML *XML; - int ithNoC; - InputParameter interface_ip; - double link_len; - double executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - Router * router; - interconnect * link_bus; - NoCParam nocdynp; - uca_org_t local_result; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - Component link_bus_tot_per_Router; - bool link_bus_exist; - bool router_exist; - string name, link_name; - double M_traffic_pattern; - NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0); - void set_noc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void init_link_bus(double link_len_); - void init_router(); - void computeEnergy_link_bus(bool is_tdp=true); - void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~NoC(); +class NoC : public Component { +public: + ParseXML *XML; + int ithNoC; + InputParameter interface_ip; + double link_len; + double executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + Router *router; + interconnect *link_bus; + NoCParam nocdynp; + uca_org_t local_result; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + Component link_bus_tot_per_Router; + bool link_bus_exist; + bool router_exist; + string name, link_name; + double M_traffic_pattern; + NoC(ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, + double M_traffic_pattern_ = 0.6, double link_len_ = 0); + void set_noc_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void init_link_bus(double link_len_); + void init_router(); + void computeEnergy_link_bus(bool is_tdp = true); + void displayEnergy_link_bus(uint32_t indent = 0, int plevel = 100, + bool is_tdp = true); + ~NoC(); }; #endif /* NOC_H_ */ diff --git a/processor.cc b/processor.cc index 2c01da5..8c5cdde 100644 --- a/processor.cc +++ b/processor.cc @@ -28,851 +28,1044 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include -#include "parameter.h" +#include "processor.h" + +#include "XML_Parse.h" #include "array.h" -#include "const.h" #include "basic_circuit.h" -#include "XML_Parse.h" -#include "processor.h" +#include "const.h" +#include "parameter.h" #include "version.h" +#include +#include +#include +#include +#include +#include +#include Processor::Processor(ParseXML *XML_interface) -:XML(XML_interface),//TODO: using one global copy may have problems. - mc(0), - niu(0), - pcie(0), - flashcontroller(0) -{ + : XML(XML_interface), // TODO: using one global copy may have problems. + mc(0), niu(0), pcie(0), flashcontroller(0) { /* - * placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - * There is no point to have heterogeneous memory controller on chip, - * thus McPAT only support homogeneous memory controllers. + * placement and routing overhead is 10%, core scales worse than cache 40% is + * accumulated from 90 to 22nm There is no point to have heterogeneous memory + * controller on chip, thus McPAT only support homogeneous memory controllers. */ int i; - double pppm_t[4] = {1,1,1,1}; + double pppm_t[4] = {1, 1, 1, 1}; set_proc_param(); if (procdynp.homoCore) - numCore = procdynp.numCore==0? 0:1; + numCore = procdynp.numCore == 0 ? 0 : 1; else - numCore = procdynp.numCore; + numCore = procdynp.numCore; if (procdynp.homoL2) - numL2 = procdynp.numL2==0? 0:1; + numL2 = procdynp.numL2 == 0 ? 0 : 1; else - numL2 = procdynp.numL2; + numL2 = procdynp.numL2; - if (XML->sys.Private_L2 && numCore != numL2) - { - cout<<"Number of private L2 does not match number of cores"<sys.Private_L2 && numCore != numL2) { + cout << "Number of private L2 does not match number of cores" << endl; + exit(0); } if (procdynp.homoL3) - numL3 = procdynp.numL3==0? 0:1; + numL3 = procdynp.numL3 == 0 ? 0 : 1; else - numL3 = procdynp.numL3; + numL3 = procdynp.numL3; if (procdynp.homoNOC) - numNOC = procdynp.numNOC==0? 0:1; + numNOC = procdynp.numNOC == 0 ? 0 : 1; else - numNOC = procdynp.numNOC; + numNOC = procdynp.numNOC; -// if (!procdynp.homoNOC) -// { -// cout<<"Current McPAT does not support heterogeneous NOC"<computeEnergy(); - cores[i]->computeEnergy(false); - if (procdynp.homoCore){ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore); - set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.power = core.power + cores[i]->power*pppm_t; - set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - power = power + core.power; - rt_power = rt_power + core.rt_power; - } - else{ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); - area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1); - core.power = core.power + cores[i]->power*pppm_t; - power = power + cores[i]->power*pppm_t; - - set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - rt_power = rt_power + cores[i]->rt_power*pppm_t; - } - } + for (i = 0; i < numCore; i++) { + cores.push_back(new Core(XML, i, &interface_ip)); + cores[i]->computeEnergy(); + cores[i]->computeEnergy(false); + if (procdynp.homoCore) { + core.area.set_area(core.area.get_area() + + cores[i]->area.get_area() * procdynp.numCore); + set_pppm(pppm_t, cores[i]->clockRate * procdynp.numCore, procdynp.numCore, + procdynp.numCore, procdynp.numCore); + core.power = core.power + cores[i]->power * pppm_t; + set_pppm(pppm_t, 1 / cores[i]->executionTime, procdynp.numCore, + procdynp.numCore, procdynp.numCore); + core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; + area.set_area(area.get_area() + + core.area.get_area()); // placement and routing overhead is + // 10%, core scales worse than cache + // 40% is accumulated from 90 to 22nm + power = power + core.power; + rt_power = rt_power + core.rt_power; + } else { + core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); + area.set_area( + area.get_area() + + cores[i]->area.get_area()); // placement and routing overhead is 10%, + // core scales worse than cache 40% is + // accumulated from 90 to 22nm - if (!XML->sys.Private_L2) - { - if (numL2 >0) - for (i = 0;i < numL2; i++) - { - l2array.push_back(new SharedCache(XML,i, &interface_ip)); - l2array[i]->computeEnergy(); - l2array[i]->computeEnergy(false); - if (procdynp.homoL2){ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2); - set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.power = l2.power + l2array[i]->power*pppm_t; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2.power; - rt_power = rt_power + l2.rt_power; - } - else{ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); - area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i]->power*pppm_t; - power = power + l2array[i]->power*pppm_t;; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - rt_power = rt_power + l2array[i]->rt_power*pppm_t; - } - } + set_pppm(pppm_t, cores[i]->clockRate, 1, 1, 1); + core.power = core.power + cores[i]->power * pppm_t; + power = power + cores[i]->power * pppm_t; + + set_pppm(pppm_t, 1 / cores[i]->executionTime, 1, 1, 1); + core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; + rt_power = rt_power + cores[i]->rt_power * pppm_t; + } } - if (numL3 >0) - for (i = 0;i < numL3; i++) - { - l3array.push_back(new SharedCache(XML,i, &interface_ip, L3)); - l3array[i]->computeEnergy(); - l3array[i]->computeEnergy(false); - if (procdynp.homoL3){ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3); - set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.power = l3.power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l3.power; - rt_power = rt_power + l3.rt_power; - - } - else{ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); - area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i]->power*pppm_t; - power = power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - rt_power = rt_power + l3array[i]->rt_power*pppm_t; - - } - } - if (numL1Dir >0) - for (i = 0;i < numL1Dir; i++) - { - l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory)); - l1dirarray[i]->computeEnergy(); - l1dirarray[i]->computeEnergy(false); - if (procdynp.homoL1Dir){ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l1dir.power; - rt_power = rt_power + l1dir.rt_power; - - } - else{ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - power = power + l1dirarray[i]->power; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l1dirarray[i]->rt_power; - } - } - - if (numL2Dir >0) - for (i = 0;i < numL2Dir; i++) - { - l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory)); - l2dirarray[i]->computeEnergy(); - l2dirarray[i]->computeEnergy(false); - if (procdynp.homoL2Dir){ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2dir.power; - rt_power = rt_power + l2dir.rt_power; - - } - else{ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - power = power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t; - } - } - - if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc = new MemoryController(XML, &interface_ip, MC); - mc->computeEnergy(); - mc->computeEnergy(false); - mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.power = mc->power*pppm_t; - power = power + mcs.power; - set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.rt_power = mc->rt_power*pppm_t; - rt_power = rt_power + mcs.rt_power; + if (!XML->sys.Private_L2) { + if (numL2 > 0) { + for (i = 0; i < numL2; i++) { + l2array.push_back(new SharedCache(XML, i, &interface_ip)); + l2array[i]->computeEnergy(); + l2array[i]->computeEnergy(false); + if (procdynp.homoL2) { + l2.area.set_area(l2.area.get_area() + + l2array[i]->area.get_area() * procdynp.numL2); + set_pppm(pppm_t, l2array[i]->cachep.clockRate * procdynp.numL2, + procdynp.numL2, procdynp.numL2, procdynp.numL2); + l2.power = l2.power + l2array[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, procdynp.numL2, + procdynp.numL2, procdynp.numL2); + l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l2.area.get_area()); // placement and routing overhead is 10%, l2 + // scales worse than cache 40% is accumulated + // from 90 to 22nm + power = power + l2.power; + rt_power = rt_power + l2.rt_power; + } else { + l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); + area.set_area( + area.get_area() + + l2array[i] + ->area.get_area()); // placement and routing overhead is + // 10%, l2 scales worse than cache + // 40% is accumulated from 90 to 22nm + set_pppm(pppm_t, l2array[i]->cachep.clockRate, 1, 1, 1); + l2.power = l2.power + l2array[i]->power * pppm_t; + power = power + l2array[i]->power * pppm_t; + ; + set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, 1, 1, 1); + l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; + rt_power = rt_power + l2array[i]->rt_power * pppm_t; + } + } + } } - if (XML->sys.flashc.number_mcs >0 )//flash controller - { - flashcontroller = new FlashController(XML, &interface_ip); - flashcontroller->computeEnergy(); - flashcontroller->computeEnergy(false); - double number_fcs = flashcontroller->fcp.num_mcs; - flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs); - area.set_area(area.get_area()+flashcontrollers.area.get_area()); - set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs ); - flashcontrollers.power = flashcontroller->power*pppm_t; - power = power + flashcontrollers.power; - set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs ); - flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t; - rt_power = rt_power + flashcontrollers.rt_power; + if (numL3 > 0) { + for (i = 0; i < numL3; i++) { + l3array.push_back(new SharedCache(XML, i, &interface_ip, L3)); + l3array[i]->computeEnergy(); + l3array[i]->computeEnergy(false); + if (procdynp.homoL3) { + l3.area.set_area(l3.area.get_area() + + l3array[i]->area.get_area() * procdynp.numL3); + set_pppm(pppm_t, l3array[i]->cachep.clockRate * procdynp.numL3, + procdynp.numL3, procdynp.numL3, procdynp.numL3); + l3.power = l3.power + l3array[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, procdynp.numL3, + procdynp.numL3, procdynp.numL3); + l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; + area.set_area(area.get_area() + + l3.area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache + // 40% is accumulated from 90 to 22nm + power = power + l3.power; + rt_power = rt_power + l3.rt_power; + } else { + l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); + area.set_area( + area.get_area() + + l3array[i]->area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache 40% + // is accumulated from 90 to 22nm + set_pppm(pppm_t, l3array[i]->cachep.clockRate, 1, 1, 1); + l3.power = l3.power + l3array[i]->power * pppm_t; + power = power + l3array[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, 1, 1, 1); + l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; + rt_power = rt_power + l3array[i]->rt_power * pppm_t; + } + } } + if (numL1Dir > 0) { + for (i = 0; i < numL1Dir; i++) { + l1dirarray.push_back(new SharedCache(XML, i, &interface_ip, L1Directory)); + l1dirarray[i]->computeEnergy(); + l1dirarray[i]->computeEnergy(false); + if (procdynp.homoL1Dir) { + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i]->area.get_area() * procdynp.numL1Dir); + set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, + procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); + l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, + procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); + l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l1dir.area.get_area()); // placement and routing overhead is 10%, + // l1dir scales worse than cache 40% is + // accumulated from 90 to 22nm + power = power + l1dir.power; + rt_power = rt_power + l1dir.rt_power; - if (XML->sys.niu.number_units >0) - { - niu = new NIUController(XML, &interface_ip); - niu->computeEnergy(); - niu->computeEnergy(false); - nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.power = niu->power*pppm_t; - power = power + nius.power; - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.rt_power = niu->rt_power*pppm_t; - rt_power = rt_power + nius.rt_power; + } else { + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i]->area.get_area()); + area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); + set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate, 1, 1, 1); + l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; + power = power + l1dirarray[i]->power; + set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, 1, 1, 1); + l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; + rt_power = rt_power + l1dirarray[i]->rt_power; + } + } + } + if (numL2Dir > 0) + for (i = 0; i < numL2Dir; i++) { + l2dirarray.push_back(new SharedCache(XML, i, &interface_ip, L2Directory)); + l2dirarray[i]->computeEnergy(); + l2dirarray[i]->computeEnergy(false); + if (procdynp.homoL2Dir) { + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i]->area.get_area() * procdynp.numL2Dir); + set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, + procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); + l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, + procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); + l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l2dir.area.get_area()); // placement and routing overhead is 10%, + // l2dir scales worse than cache 40% is + // accumulated from 90 to 22nm + power = power + l2dir.power; + rt_power = rt_power + l2dir.rt_power; + } else { + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i]->area.get_area()); + area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); + set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate, 1, 1, 1); + l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; + power = power + l2dirarray[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, 1, 1, 1); + l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; + rt_power = rt_power + l2dirarray[i]->rt_power * pppm_t; + } + } + + if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { + mc = new MemoryController(XML, &interface_ip, MC); + mc->computeEnergy(); + mc->computeEnergy(false); + mcs.area.set_area(mcs.area.get_area() + + mc->area.get_area() * XML->sys.mc.number_mcs); + area.set_area(area.get_area() + + mc->area.get_area() * XML->sys.mc.number_mcs); + set_pppm(pppm_t, XML->sys.mc.number_mcs * mc->mcp.clockRate, + XML->sys.mc.number_mcs, XML->sys.mc.number_mcs, + XML->sys.mc.number_mcs); + mcs.power = mc->power * pppm_t; + power = power + mcs.power; + set_pppm(pppm_t, 1 / mc->mcp.executionTime, XML->sys.mc.number_mcs, + XML->sys.mc.number_mcs, XML->sys.mc.number_mcs); + mcs.rt_power = mc->rt_power * pppm_t; + rt_power = rt_power + mcs.rt_power; } - if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0) + if (XML->sys.flashc.number_mcs > 0) // flash controller { - pcie = new PCIeController(XML, &interface_ip); - pcie->computeEnergy(); - pcie->computeEnergy(false); - pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.power = pcie->power*pppm_t; - power = power + pcies.power; - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.rt_power = pcie->rt_power*pppm_t; - rt_power = rt_power + pcies.rt_power; + flashcontroller = new FlashController(XML, &interface_ip); + flashcontroller->computeEnergy(); + flashcontroller->computeEnergy(false); + double number_fcs = flashcontroller->fcp.num_mcs; + flashcontrollers.area.set_area(flashcontrollers.area.get_area() + + flashcontroller->area.get_area() * + number_fcs); + area.set_area(area.get_area() + flashcontrollers.area.get_area()); + set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); + flashcontrollers.power = flashcontroller->power * pppm_t; + power = power + flashcontrollers.power; + set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); + flashcontrollers.rt_power = flashcontroller->rt_power * pppm_t; + rt_power = rt_power + flashcontrollers.rt_power; + } + if (XML->sys.niu.number_units > 0) { + niu = new NIUController(XML, &interface_ip); + niu->computeEnergy(); + niu->computeEnergy(false); + nius.area.set_area(nius.area.get_area() + + niu->area.get_area() * XML->sys.niu.number_units); + area.set_area(area.get_area() + + niu->area.get_area() * XML->sys.niu.number_units); + set_pppm(pppm_t, XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units, XML->sys.niu.number_units, + XML->sys.niu.number_units); + nius.power = niu->power * pppm_t; + power = power + nius.power; + set_pppm(pppm_t, XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units, XML->sys.niu.number_units, + XML->sys.niu.number_units); + nius.rt_power = niu->rt_power * pppm_t; + rt_power = rt_power + nius.rt_power; } - if (numNOC >0) - { - for (i = 0;i < numNOC; i++) - { - if (XML->sys.NoC[i].type) - {//First add up area of routers if NoC is used - nocs.push_back(new NoC(XML,i, &interface_ip, 1)); - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - else - {//Bus based interconnect - nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage))); - if (procdynp.homoNOC){ - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - } - - /* - * Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip - * area must be obtain to decide the link routing - */ - for (i = 0;i < numNOC; i++) - { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) - { - nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - } - } - } - //Compute energy of NoC (w or w/o links) or buses - for (i = 0;i < numNOC; i++) - { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); - if (procdynp.homoNOC){ - set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.power = noc.power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - power = power + noc.power; - rt_power = rt_power + noc.rt_power; - } - else - { - set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power*pppm_t; - power = power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - rt_power = rt_power + nocs[i]->rt_power*pppm_t; - - - } - } + if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { + pcie = new PCIeController(XML, &interface_ip); + pcie->computeEnergy(); + pcie->computeEnergy(false); + pcies.area.set_area(pcies.area.get_area() + + pcie->area.get_area() * XML->sys.pcie.number_units); + area.set_area(area.get_area() + + pcie->area.get_area() * XML->sys.pcie.number_units); + set_pppm(pppm_t, XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units, XML->sys.pcie.number_units, + XML->sys.pcie.number_units); + pcies.power = pcie->power * pppm_t; + power = power + pcies.power; + set_pppm(pppm_t, XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units, XML->sys.pcie.number_units, + XML->sys.pcie.number_units); + pcies.rt_power = pcie->rt_power * pppm_t; + rt_power = rt_power + pcies.rt_power; } -// //clock power -// globalClock.init_wire_external(is_default, &interface_ip); -// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 -// globalClock.end_wiring_level =5;//toplevel metal -// globalClock.start_wiring_level =5;//toplevel metal -// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes -// globalClock.optimize_wire(); + if (numNOC > 0) { + for (i = 0; i < numNOC; i++) { + if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used + nocs.push_back(new NoC(XML, i, &interface_ip, 1)); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } else { // Bus based interconnect + nocs.push_back( + new NoC(XML, i, &interface_ip, 1, + sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } + } + + /* + * Compute global links associated with each NOC, if any. This must be done + * at the end (even after the NOC router part) since the total chip area + * must be obtain to decide the link routing + */ + for (i = 0; i < numNOC; i++) { + if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { + nocs[i]->init_link_bus( + sqrt(area.get_area() * + XML->sys.NoC[i].chip_coverage)); // compute global links + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + } else { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + } + } + } + // Compute energy of NoC (w or w/o links) or buses + for (i = 0; i < numNOC; i++) { + nocs[i]->computeEnergy(); + nocs[i]->computeEnergy(false); + if (procdynp.homoNOC) { + set_pppm(pppm_t, procdynp.numNOC * nocs[i]->nocdynp.clockRate, + procdynp.numNOC, procdynp.numNOC, procdynp.numNOC); + noc.power = noc.power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, procdynp.numNOC, + procdynp.numNOC, procdynp.numNOC); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + power = power + noc.power; + rt_power = rt_power + noc.rt_power; + } else { + set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); + noc.power = noc.power + nocs[i]->power * pppm_t; + power = power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + rt_power = rt_power + nocs[i]->rt_power * pppm_t; + } + } + } + // //clock power + // globalClock.init_wire_external(is_default, &interface_ip); + // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 + // globalClock.end_wiring_level =5;//toplevel metal + // globalClock.start_wiring_level =5;//toplevel metal + // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local + // final nodes globalClock.optimize_wire(); } -void Processor::displayDeviceType(int device_type_, uint32_t indent) -{ - string indent_str(indent, ' '); - - switch ( device_type_ ) { - - case 0 : - cout <sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - if (is_tdp) - { - - if (plevel<5) - { - cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") results (current print level is "<< plevel - <<", please increase print level to see the details in components): "<sys.core_tech_node<<" nm"<sys.core[0].clock_rate<0){ - cout <sys.number_of_cores << " cores "<sys.device_type,indent); - cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? core.power.readOp.power_gated_with_long_channel_leakage : core.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl; - cout <sys.Private_L2) - { - if (numL2 >0){ - cout <sys.L2[0].device_type,indent); - cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? l2.power.readOp.power_gated_with_long_channel_leakage : l2.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L3[0].device_type, indent); - cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? l3.power.readOp.power_gated_with_long_channel_leakage : l3.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? l1dir.power.readOp.power_gated_with_long_channel_leakage : l1dir.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? l2dir.power.readOp.power_gated_with_long_channel_leakage : l2dir.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.device_type, indent); - cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? noc.power.readOp.power_gated_with_long_channel_leakage : noc.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl; - cout <sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - cout <sys.mc.number_mcs << " Memory Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? mcs.power.readOp.power_gated_with_long_channel_leakage : mcs.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl; - cout <sys.flashc.number_mcs >0) - { - cout <fcp.num_mcs << " Flash/SSD Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? flashcontrollers.power.readOp.power_gated_with_long_channel_leakage : flashcontrollers.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl; - cout <sys.niu.number_units >0 ) - { - cout <niup.num_units << " Network Interface Units "<sys.device_type, indent); - cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? nius.power.readOp.power_gated_with_long_channel_leakage : nius.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl; - cout <sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - cout <pciep.num_units << " PCIe Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel? pcies.power.readOp.power_gated_with_long_channel_leakage : pcies.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl; - cout <1) - { - for (i = 0;i < numCore; i++) - { - cores[i]->displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.Private_L2) - { - for (i = 0;i < numL2; i++) - { - l2array[i]->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0) - { - flashcontroller->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.niu.number_units >0 ) - { - niu->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - pcie->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + if (is_tdp) { + if (plevel < 5) { + cout + << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " + << VER_UPDATE << ") results (current print level is " << plevel + << ", please increase print level to see the details in components): " + << endl; + } else { + cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " + << VER_UPDATE << ") results (current print level is 5)" << endl; + } + cout << "******************************************************************" + "***********************" + << endl; + cout << indent_str << "Technology " << XML->sys.core_tech_node << " nm" + << endl; + // cout <sys.interconnect_projection_type<sys.interconnect_projection_type, indent); + cout << indent_str << "Core clock Rate(MHz) " << XML->sys.core[0].clock_rate + << endl; + cout << endl; + cout << "******************************************************************" + "***********************" + << endl; + cout << "Processor: " << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str << "Peak Power = " + << power.readOp.dynamic + + (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + + power.readOp.gate_leakage + << " W" << endl; + cout << indent_str << "Total Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + + power.readOp.gate_leakage + << " W" << endl; + cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic + << " W" << endl; + cout << endl; + if (numCore > 0) { + cout << indent_str << "Total Cores: " << XML->sys.number_of_cores + << " cores " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next << "Area = " << core.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? core.power.readOp.longer_channel_leakage + : core.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? core.power.readOp.power_gated_with_long_channel_leakage + : core.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (!XML->sys.Private_L2) { + if (numL2 > 0) { + cout << indent_str << "Total L2s: " << endl; + displayDeviceType(XML->sys.L2[0].device_type, indent); + cout << indent_str_next << "Area = " << l2.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l2.power.readOp.longer_channel_leakage + : l2.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l2.power.readOp.power_gated_with_long_channel_leakage + : l2.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + } + if (numL3 > 0) { + cout << indent_str << "Total L3s: " << endl; + displayDeviceType(XML->sys.L3[0].device_type, indent); + cout << indent_str_next << "Area = " << l3.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l3.power.readOp.longer_channel_leakage + : l3.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l3.power.readOp.power_gated_with_long_channel_leakage + : l3.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (numL1Dir > 0) { + cout << indent_str << "Total First Level Directory: " << endl; + displayDeviceType(XML->sys.L1Directory[0].device_type, indent); + cout << indent_str_next << "Area = " << l1dir.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l1dir.power.readOp.longer_channel_leakage + : l1dir.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l1dir.power.readOp.power_gated_with_long_channel_leakage + : l1dir.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (numL2Dir > 0) { + cout << indent_str << "Total Second Level Directory: " << endl; + displayDeviceType(XML->sys.L1Directory[0].device_type, indent); + cout << indent_str_next << "Area = " << l2dir.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l2dir.power.readOp.longer_channel_leakage + : l2dir.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l2dir.power.readOp.power_gated_with_long_channel_leakage + : l2dir.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (numNOC > 0) { + cout << indent_str << "Total NoCs (Network/Bus): " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next << "Area = " << noc.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? noc.power.readOp.longer_channel_leakage + : noc.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? noc.power.readOp.power_gated_with_long_channel_leakage + : noc.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { + cout << indent_str << "Total MCs: " << XML->sys.mc.number_mcs + << " Memory Controllers " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next << "Area = " << mcs.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? mcs.power.readOp.longer_channel_leakage + : mcs.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? mcs.power.readOp.power_gated_with_long_channel_leakage + : mcs.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (XML->sys.flashc.number_mcs > 0) { + cout << indent_str + << "Total Flash/SSD Controllers: " << flashcontroller->fcp.num_mcs + << " Flash/SSD Controllers " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next + << "Area = " << flashcontrollers.area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? flashcontrollers.power.readOp.longer_channel_leakage + : flashcontrollers.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? flashcontrollers.power.readOp + .power_gated_with_long_channel_leakage + : flashcontrollers.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic + << " W" << endl; + cout << endl; + } + if (XML->sys.niu.number_units > 0) { + cout << indent_str << "Total NIUs: " << niu->niup.num_units + << " Network Interface Units " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next << "Area = " << nius.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? nius.power.readOp.longer_channel_leakage + : nius.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? nius.power.readOp.power_gated_with_long_channel_leakage + : nius.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { + cout << indent_str << "Total PCIes: " << pcie->pciep.num_units + << " PCIe Controllers " << endl; + displayDeviceType(XML->sys.device_type, indent); + cout << indent_str_next << "Area = " << pcies.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? pcies.power.readOp.longer_channel_leakage + : pcies.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? pcies.power.readOp.power_gated_with_long_channel_leakage + : pcies.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next + << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" + << endl; + cout << endl; + } + cout << "******************************************************************" + "***********************" + << endl; + if (plevel > 1) { + for (i = 0; i < numCore; i++) { + cores[i]->displayEnergy(indent + 4, plevel, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + if (!XML->sys.Private_L2) { + for (i = 0; i < numL2; i++) { + l2array[i]->displayEnergy(indent + 4, is_tdp); + cout << "************************************************************" + "*****************************" + << endl; + } + } + for (i = 0; i < numL3; i++) { + l3array[i]->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + for (i = 0; i < numL1Dir; i++) { + l1dirarray[i]->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + for (i = 0; i < numL2Dir; i++) { + l2dirarray[i]->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + if (XML->sys.mc.number_mcs > 0 && + XML->sys.mc.memory_channels_per_mc > 0) { + mc->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + if (XML->sys.flashc.number_mcs > 0 && + XML->sys.flashc.memory_channels_per_mc > 0) { + flashcontroller->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + if (XML->sys.niu.number_units > 0) { + niu->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { + pcie->displayEnergy(indent + 4, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + + for (i = 0; i < numNOC; i++) { + nocs[i]->displayEnergy(indent + 4, plevel, is_tdp); + cout << "**************************************************************" + "***************************" + << endl; + } + } + } else { + } } -void Processor::set_proc_param() -{ - bool debug = false; - - procdynp.homoCore = bool(debug?1:XML->sys.homogeneous_cores); - procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s); - procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s); - procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs); - procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories); - procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories); - - procdynp.numCore = XML->sys.number_of_cores; - procdynp.numL2 = XML->sys.number_of_L2s; - procdynp.numL3 = XML->sys.number_of_L3s; - procdynp.numNOC = XML->sys.number_of_NoCs; - procdynp.numL1Dir = XML->sys.number_of_L1Directories; - procdynp.numL2Dir = XML->sys.number_of_L2Directories; - procdynp.numMC = XML->sys.mc.number_mcs; - procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc; - -// if (procdynp.numCore<1) -// { -// cout<<" The target processor should at least have one core on chip." <2) - // { - // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<sys.device_type; - interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - - interface_ip.specific_hp_vdd = false; - interface_ip.specific_lop_vdd = false; - interface_ip.specific_lstp_vdd = false; - - interface_ip.specific_vcc_min = false; - - interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type; - interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. - interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_wt = 0; - interface_ip.cycle_time_wt = 0; - - interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied. - interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_dev = 10000; - interface_ip.cycle_time_dev = 10000; - - interface_ip.ed = 2; - interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately - interface_ip.int_prefetch_w = 1; - interface_ip.page_sz_bits = 0; - interface_ip.temp = debug?360: XML->sys.temperature; - interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node; - interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; - - //***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors. - //They will be overridden during each components initialization - interface_ip.cache_sz =64; - interface_ip.line_sz = 1; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = 64; - interface_ip.access_mode = 2; - - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - - interface_ip.is_main_mem = false; - interface_ip.rpters_in_htree = true ; - interface_ip.ver_htree_wires_over_array = 0; - interface_ip.broadcast_addr_din_over_ver_htrees = 0; - - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - interface_ip.nuca = 0; - interface_ip.nuca_bank_count = 0; - interface_ip.is_cache =true; - interface_ip.pure_ram =false; - interface_ip.pure_cam =false; - interface_ip.force_cache_config =false; - interface_ip.power_gating =XML->sys.power_gating; - - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - interface_ip.force_wiretype = false; - interface_ip.print_detail = 1; - interface_ip.add_ecc_b_ =true; +void Processor::set_proc_param() { + bool debug = false; + + procdynp.homoCore = bool(debug ? 1 : XML->sys.homogeneous_cores); + procdynp.homoL2 = bool(debug ? 1 : XML->sys.homogeneous_L2s); + procdynp.homoL3 = bool(debug ? 1 : XML->sys.homogeneous_L3s); + procdynp.homoNOC = bool(debug ? 1 : XML->sys.homogeneous_NoCs); + procdynp.homoL1Dir = bool(debug ? 1 : XML->sys.homogeneous_L1Directories); + procdynp.homoL2Dir = bool(debug ? 1 : XML->sys.homogeneous_L2Directories); + + procdynp.numCore = XML->sys.number_of_cores; + procdynp.numL2 = XML->sys.number_of_L2s; + procdynp.numL3 = XML->sys.number_of_L3s; + procdynp.numNOC = XML->sys.number_of_NoCs; + procdynp.numL1Dir = XML->sys.number_of_L1Directories; + procdynp.numL2Dir = XML->sys.number_of_L2Directories; + procdynp.numMC = XML->sys.mc.number_mcs; + procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc; + + // if (procdynp.numCore<1) + // { + // cout<<" The target processor should at least have one core on + // chip." + //<2) + // { + // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global + // and local NOCs)"<sys.device_type; + interface_ip.data_arr_peri_global_tech_type = + debug ? 0 : XML->sys.device_type; + interface_ip.tag_arr_ram_cell_tech_type = debug ? 0 : XML->sys.device_type; + interface_ip.tag_arr_peri_global_tech_type = debug ? 0 : XML->sys.device_type; + + interface_ip.specific_hp_vdd = false; + interface_ip.specific_lop_vdd = false; + interface_ip.specific_lstp_vdd = false; + + interface_ip.specific_vcc_min = false; + + interface_ip.ic_proj_type = debug ? 0 : XML->sys.interconnect_projection_type; + interface_ip.delay_wt = + 100; // Fixed number, make sure timing can be satisfied. + interface_ip.area_wt = 0; // Fixed number, This is used to exhaustive search + // for individual components. + interface_ip.dynamic_power_wt = + 100; // Fixed number, This is used to exhaustive search for individual + // components. + interface_ip.leakage_power_wt = 0; + interface_ip.cycle_time_wt = 0; + + interface_ip.delay_dev = + 10000; // Fixed number, make sure timing can be satisfied. + interface_ip.area_dev = 10000; // Fixed number, This is used to exhaustive + // search for individual components. + interface_ip.dynamic_power_dev = + 10000; // Fixed number, This is used to exhaustive search for individual + // components. + interface_ip.leakage_power_dev = 10000; + interface_ip.cycle_time_dev = 10000; + + interface_ip.ed = 2; + interface_ip.burst_len = 1; // parameters are fixed for processor section, + // since memory is processed separately + interface_ip.int_prefetch_w = 1; + interface_ip.page_sz_bits = 0; + interface_ip.temp = debug ? 360 : XML->sys.temperature; + interface_ip.F_sz_nm = + debug ? 90 : XML->sys.core_tech_node; // XML->sys.core_tech_node; + interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; + + //***********This section of code does not have real meaning, they are just to + // ensure all data will have initial value to prevent errors. They will be + // overridden during each components initialization + interface_ip.cache_sz = 64; + interface_ip.line_sz = 1; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = 64; + interface_ip.access_mode = 2; + + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + + interface_ip.is_main_mem = false; + interface_ip.rpters_in_htree = true; + interface_ip.ver_htree_wires_over_array = 0; + interface_ip.broadcast_addr_din_over_ver_htrees = 0; + + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 1; + interface_ip.nuca = 0; + interface_ip.nuca_bank_count = 0; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.force_cache_config = false; + interface_ip.power_gating = XML->sys.power_gating; + + if (XML->sys.Embedded) { + interface_ip.wt = Global_30; + interface_ip.wire_is_mat_type = 0; + interface_ip.wire_os_mat_type = 0; + } else { + interface_ip.wt = Global; + interface_ip.wire_is_mat_type = 2; + interface_ip.wire_os_mat_type = 2; + } + interface_ip.force_wiretype = false; + interface_ip.print_detail = 1; + interface_ip.add_ecc_b_ = true; } -Processor::~Processor(){ - while (!cores.empty()) - { - delete cores.back(); - cores.pop_back(); - } - while (!l2array.empty()) - { - delete l2array.back(); - l2array.pop_back(); - } - while (!l3array.empty()) - { - delete l3array.back(); - l3array.pop_back(); - } - while (!nocs.empty()) - { - delete nocs.back(); - nocs.pop_back(); - } - while (!l1dirarray.empty()) - { - delete l1dirarray.back(); - l1dirarray.pop_back(); - } - while (!l2dirarray.empty()) - { - delete l2dirarray.back(); - l2dirarray.pop_back(); - } - if (mc) - { - delete mc; - mc=0; - } - if (niu) - { - delete niu; - niu =0; - } - if (pcie) - { - delete pcie; - pcie=0; - } - if (flashcontroller) - { - delete flashcontroller; - flashcontroller = 0; - } +Processor::~Processor() { + while (!cores.empty()) { + delete cores.back(); + cores.pop_back(); + } + while (!l2array.empty()) { + delete l2array.back(); + l2array.pop_back(); + } + while (!l3array.empty()) { + delete l3array.back(); + l3array.pop_back(); + } + while (!nocs.empty()) { + delete nocs.back(); + nocs.pop_back(); + } + while (!l1dirarray.empty()) { + delete l1dirarray.back(); + l1dirarray.pop_back(); + } + while (!l2dirarray.empty()) { + delete l2dirarray.back(); + l2dirarray.pop_back(); + } + if (mc) { + delete mc; + mc = 0; + } + if (niu) { + delete niu; + niu = 0; + } + if (pcie) { + delete pcie; + pcie = 0; + } + if (flashcontroller) { + delete flashcontroller; + flashcontroller = 0; + } }; diff --git a/processor.h b/processor.h index baf05f3..d0d5a6c 100644 --- a/processor.h +++ b/processor.h @@ -32,47 +32,48 @@ #define PROCESSOR_H_ #include "XML_Parse.h" +#include "arbiter.h" #include "area.h" -#include "decoder.h" -#include "parameter.h" #include "array.h" -#include "arbiter.h" -#include #include "basic_components.h" #include "core.h" +#include "decoder.h" +#include "iocontrollers.h" #include "memoryctrl.h" +#include "noc.h" +#include "parameter.h" #include "router.h" #include "sharedcache.h" -#include "noc.h" -#include "iocontrollers.h" -class Processor : public Component -{ - public: - ParseXML *XML; - vector cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; - vector nocs; - MemoryController * mc; - NIUController * niu; - PCIeController * pcie; - FlashController * flashcontroller; - InputParameter interface_ip; - ProcParam procdynp; - //wire globalInterconnect; - //clock_network globalClock; - Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface); - void compute(); - void set_proc_param(); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void displayDeviceType(int device_type_, uint32_t indent = 0); - void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); - ~Processor(); +#include + +class Processor : public Component { +public: + ParseXML *XML; + vector cores; + vector l2array; + vector l3array; + vector l1dirarray; + vector l2dirarray; + vector nocs; + MemoryController *mc; + NIUController *niu; + PCIeController *pcie; + FlashController *flashcontroller; + InputParameter interface_ip; + ProcParam procdynp; + // wire globalInterconnect; + // clock_network globalClock; + Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies, + flashcontrollers; + int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; + Processor(ParseXML *XML_interface); + void compute(); + void set_proc_param(); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void displayDeviceType(int device_type_, uint32_t indent = 0); + void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); + ~Processor(); }; #endif /* PROCESSOR_H_ */ diff --git a/sharedcache.cc b/sharedcache.cc index c5e9b42..deb9f60 100644 --- a/sharedcache.cc +++ b/sharedcache.cc @@ -29,256 +29,263 @@ * ***************************************************************************/ -#include "io.h" -#include "parameter.h" +#include "sharedcache.h" + +#include "XML_Parse.h" +#include "arbiter.h" #include "array.h" +#include "basic_circuit.h" #include "const.h" +#include "io.h" #include "logic.h" -#include "basic_circuit.h" -#include "arbiter.h" -#include -#include +#include "parameter.h" + #include -#include "XML_Parse.h" -#include -#include #include -#include "sharedcache.h" +#include +#include +#include -SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip_, enum cache_level cacheL_) -:XML(XML_interface), - ithCache(ithCache_), - interface_ip(*interface_ip_), - cacheL(cacheL_), - dir_overhead(0) -{ +SharedCache::SharedCache(ParseXML *XML_interface, int ithCache_, + InputParameter *interface_ip_, + enum cache_level cacheL_) + : XML(XML_interface), ithCache(ithCache_), interface_ip(*interface_ip_), + cacheL(cacheL_), dir_overhead(0) { int idx; int tag, data; bool is_default, debug; enum Device_ty device_t; - enum Core_type core_t; + enum Core_type core_t; double size, line, assoc, banks; - if (cacheL==L2 && XML->sys.Private_L2) - { - device_t=Core_device; - core_t = (enum Core_type)XML->sys.core[ithCache].machine_type; - } - else - { - device_t=LLC_device; - core_t = Inorder; + if (cacheL == L2 && XML->sys.Private_L2) { + device_t = Core_device; + core_t = (enum Core_type)XML->sys.core[ithCache].machine_type; + } else { + device_t = LLC_device; + core_t = Inorder; } - - debug = false; - is_default=true;//indication for default setup - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_cache_param(); - - //All lower level cache are physically indexed and tagged. - size = cachep.capacity; - line = cachep.blockW; - assoc = cachep.assoc; - banks = cachep.nbanks; - if ((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)) - { - assoc = 0; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 1; + debug = false; + is_default = true; // indication for default setup + if (XML->sys.Embedded) { + interface_ip.wt = Global_30; + interface_ip.wire_is_mat_type = 0; + interface_ip.wire_os_mat_type = 1; + } else { + interface_ip.wt = Global; + interface_ip.wire_is_mat_type = 2; + interface_ip.wire_os_mat_type = 2; } - else - { - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 0; - if (cachep.dir_ty==SBT) - { - dir_overhead = ceil(XML->sys.number_of_cores/8.0)*8/(cachep.blockW*8); - line = cachep.blockW*(1+ dir_overhead) ; - size = cachep.capacity*(1+ dir_overhead); + set_cache_param(); - } + // All lower level cache are physically indexed and tagged. + size = cachep.capacity; + line = cachep.blockW; + assoc = cachep.assoc; + banks = cachep.nbanks; + if ((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory)) { + assoc = 0; + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + interface_ip.num_search_ports = 1; + } else { + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + tag = debug ? 51 + : XML->sys.physical_address_width - idx - + int(ceil(log2(line))) + EXTRA_TAG_BITS; + interface_ip.num_search_ports = 0; + if (cachep.dir_ty == SBT) { + dir_overhead = + ceil(XML->sys.number_of_cores / 8.0) * 8 / (cachep.blockW * 8); + line = cachep.blockW * (1 + dir_overhead); + size = cachep.capacity * (1 + dir_overhead); + } } -// if (XML->sys.first_level_dir==2) -// tag += int(XML->sys.domain_size + 5); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = (int)size; - interface_ip.line_sz = (int)line; - interface_ip.assoc = (int)assoc; - interface_ip.nbanks = (int)banks; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 1; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; + // if (XML->sys.first_level_dir==2) + // tag += int(XML->sys.domain_size + 5); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = (int)size; + interface_ip.line_sz = (int)line; + interface_ip.assoc = (int)assoc; + interface_ip.nbanks = (int)banks; + interface_ip.out_w = interface_ip.line_sz * 8 / 2; + interface_ip.access_mode = 1; + interface_ip.throughput = cachep.throughput; + interface_ip.latency = cachep.latency; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; -// interface_ip.force_cache_config =true; -// interface_ip.ndwl = 4; -// interface_ip.ndbl = 8; -// interface_ip.nspd = 1; -// interface_ip.ndcm =1 ; -// interface_ip.ndsam1 =1; -// interface_ip.ndsam2 =1; - unicache.caches = new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.caches->local_result.area); - area.set_area(area.get_area()+ unicache.caches->local_result.area); - interface_ip.force_cache_config =false; - - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = cachep.missb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput;//means cycle time - interface_ip.latency = cachep.latency;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.missb->local_result.area); - area.set_area(area.get_area()+ unicache.missb->local_result.area); - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*cachep.fu_size ; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.ifb->local_result.area); - area.set_area(area.get_area()+ unicache.ifb->local_result.area); - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = unicache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = cachep.prefetchb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.prefetchb->local_result.area); - area.set_area(area.get_area()+ unicache.prefetchb->local_result.area); - //WBB - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = cachep.wbb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.wbb = new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.wbb->local_result.area); - area.set_area(area.get_area()+ unicache.wbb->local_result.area); + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // lower level cache usually has one port. + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + // interface_ip.force_cache_config =true; + // interface_ip.ndwl = 4; + // interface_ip.ndbl = 8; + // interface_ip.nspd = 1; + // interface_ip.ndcm =1 ; + // interface_ip.ndsam1 =1; + // interface_ip.ndsam2 =1; + unicache.caches = + new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t); + unicache.area.set_area(unicache.area.get_area() + + unicache.caches->local_result.area); + area.set_area(area.get_area() + unicache.caches->local_result.area); + interface_ip.force_cache_config = false; + + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + + unicache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = cachep.missb_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8 / 2; + interface_ip.access_mode = 0; + interface_ip.throughput = cachep.throughput; // means cycle time + interface_ip.latency = cachep.latency; // means access time + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 1; + unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, + true, core_t); + unicache.area.set_area(unicache.area.get_area() + + unicache.missb->local_result.area); + area.set_area(area.get_area() + unicache.missb->local_result.area); + // fill buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = unicache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = data * cachep.fu_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8 / 2; + interface_ip.access_mode = 0; + interface_ip.throughput = cachep.throughput; + interface_ip.latency = cachep.latency; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, + true, core_t); + unicache.area.set_area(unicache.area.get_area() + + unicache.ifb->local_result.area); + area.set_area(area.get_area() + unicache.ifb->local_result.area); + // prefetch buffer + tag = XML->sys.physical_address_width + + EXTRA_TAG_BITS; // check with previous entries to decide wthether to + // merge. + data = unicache.caches->l_ip + .line_sz; // separate queue to prevent from cache polution. + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = cachep.prefetchb_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8 / 2; + interface_ip.access_mode = 0; + interface_ip.throughput = cachep.throughput; + interface_ip.latency = cachep.latency; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", + device_t, true, core_t); + unicache.area.set_area(unicache.area.get_area() + + unicache.prefetchb->local_result.area); + area.set_area(area.get_area() + unicache.prefetchb->local_result.area); + // WBB + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = unicache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; + interface_ip.cache_sz = cachep.wbb_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8 / 2; + interface_ip.access_mode = 0; + interface_ip.throughput = cachep.throughput; + interface_ip.latency = cachep.latency; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + unicache.wbb = + new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t); + unicache.area.set_area(unicache.area.get_area() + + unicache.wbb->local_result.area); + area.set_area(area.get_area() + unicache.wbb->local_result.area); } // //pipeline -// interface_ip.pipeline_stages = int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + llCache.caches.l_ip.tag_w ; -// pipeLogicCache.init_pipeline(is_default, &interface_ip); -// pipeLogicCache.compute_pipeline(); + // interface_ip.pipeline_stages = + // int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time)); + // interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + + // llCache.caches.l_ip.tag_w ; pipeLogicCache.init_pipeline(is_default, + // &interface_ip); pipeLogicCache.compute_pipeline(); /* if (!((XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==1) - ||(XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==2)))//not single level IC and DIC + ||(XML->sys.number_of_dir_levels==1 && + XML->sys.first_level_dir ==2)))//not single level IC and DIC { //directory Now assuming one directory per bank, TODO:should change it later size = XML->sys.L2directory.L2Dir_config[0]; line = XML->sys.L2directory.L2Dir_config[1]; assoc = XML->sys.L2directory.L2Dir_config[2]; banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 0; - interface_ip.tag_w = tag; + tag = + debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit + over estimate interface_ip.specific_tag = 0; interface_ip.tag_w = tag; interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. + interface_ip.access_mode = + 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; + interface_ip.throughput = + XML->sys.L2directory.L2Dir_config[4]/clockRate; interface_ip.latency = + XML->sys.L2directory.L2Dir_config[5]/clockRate; interface_ip.is_cache + = true; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power + = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = + 1; interface_ip.num_rw_ports = 1;//lower level cache usually has one port. interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -290,21 +297,27 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* //output_data_csv(directory.caches.local_result); ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + //miss buffer Each MSHR contains enough state to handle one or more accesses + of any type to a single memory line. + //Due to the generality of the MSHR mechanism, the amount of state involved is + non-trivial, + //including the address, pointers to the cache entry and destination register, + written data, and various other pieces of state. tag + = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = + (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + + directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time + interface_ip.line_sz = + int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time + interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; @@ -321,9 +334,9 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS; data + = directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; @@ -331,13 +344,11 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip.nbanks = 1; interface_ip.out_w = interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -349,23 +360,23 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution. + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries + to decide wthether to merge. + data = + directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -377,23 +388,20 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS; data + = directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -406,27 +414,26 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* if (XML->sys.number_of_dir_levels ==2 && XML->sys.first_level_dir==0) { //first level directory - size = XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128; - line = int(ceil(XML->sys.domain_size/8.0)); - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; + size = + XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128; line = + int(ceil(XML->sys.domain_size/8.0)); assoc = + XML->sys.L2directory.L2Dir_config[2]; banks = + XML->sys.L2directory.L2Dir_config[3]; tag + = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little + bit over estimate interface_ip.specific_tag = 1; interface_ip.tag_w = + tag; interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. + interface_ip.access_mode = + 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; + interface_ip.throughput = + XML->sys.L2directory.L2Dir_config[4]/clockRate; interface_ip.latency = + XML->sys.L2directory.L2Dir_config[5]/clockRate; interface_ip.is_cache + = true; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power + = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = + 1; interface_ip.num_rw_ports = 1;//lower level cache usually has one port. interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -438,21 +445,27 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* //output_data_csv(directory.caches.local_result); ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + //miss buffer Each MSHR contains enough state to handle one or more accesses + of any type to a single memory line. + //Due to the generality of the MSHR mechanism, the amount of state involved is + non-trivial, + //including the address, pointers to the cache entry and destination register, + written data, and various other pieces of state. tag + = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = + (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + + directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time + interface_ip.line_sz = + int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time + interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; @@ -469,9 +482,9 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS; data + = directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; @@ -479,13 +492,11 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip.nbanks = 1; interface_ip.out_w = interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -497,23 +508,22 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory1.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries + to decide wthether to merge. + data = + directory1.caches.l_ip.line_sz;//separate queue to prevent from cache + polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -525,23 +535,20 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS; data + = directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; + interface_ip.cache_sz = + XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; interface_ip.assoc + = 0; interface_ip.nbanks = 1; interface_ip.out_w = + interface_ip.line_sz*8; interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -553,338 +560,449 @@ SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* if (XML->sys.first_level_dir==1)//IC { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = int(ceil(XML->sys.domain_size/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1024; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(inv_dir.caches.name,"inv_dir"); - inv_dir.caches.init_cache(&interface_ip); - inv_dir.caches.optimize_array(); - inv_dir.area = inv_dir.caches.local_result.area; + tag = + XML->sys.physical_address_width + EXTRA_TAG_BITS; data + = int(ceil(XML->sys.domain_size/8.0)); interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; + interface_ip.cache_sz = + XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1]; + interface_ip.assoc = 0; + interface_ip.nbanks = 1024; + interface_ip.out_w = interface_ip.line_sz*8; + interface_ip.access_mode = 0; + interface_ip.throughput = + XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = + XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy + = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = + 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + strcpy(inv_dir.caches.name,"inv_dir"); + inv_dir.caches.init_cache(&interface_ip); + inv_dir.caches.optimize_array(); + inv_dir.area = inv_dir.caches.local_result.area; } */ -// //pipeline -// interface_ip.pipeline_stages = int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = directory.caches.l_ip.out_w + directory.caches.l_ip.tag_w ; -// pipeLogicDirectory.init_pipeline(is_default, &interface_ip); -// pipeLogicDirectory.compute_pipeline(); -// -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + pipeLogicDirectory.tot_stage_vector; -// clockNetwork.optimize_wire(); - + // //pipeline + // interface_ip.pipeline_stages = + // int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time)); + // interface_ip.per_stage_vector = directory.caches.l_ip.out_w + + // directory.caches.l_ip.tag_w ; pipeLogicDirectory.init_pipeline(is_default, + // &interface_ip); pipeLogicDirectory.compute_pipeline(); + // + // //clock power + // clockNetwork.init_wire_external(is_default, &interface_ip); + // clockNetwork.clk_area =area*1.1;//10% of placement overhead. + // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal + // clockNetwork.start_wiring_level =5;//toplevel metal + // clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + + // pipeLogicDirectory.tot_stage_vector; clockNetwork.optimize_wire(); } +void SharedCache::computeEnergy(bool is_tdp) { + double homenode_data_access = (cachep.dir_ty == SBT) ? 0.9 : 1.0; + if (is_tdp) { + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + // init stats for Peak + unicache.caches->stats_t.readAc.access = + .67 * unicache.caches->l_ip.num_rw_ports * cachep.duty_cycle * + homenode_data_access; + unicache.caches->stats_t.readAc.miss = 0; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = + .33 * unicache.caches->l_ip.num_rw_ports * cachep.duty_cycle * + homenode_data_access; + unicache.caches->stats_t.writeAc.miss = 0; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->tdp_stats = unicache.caches->stats_t; + + if (cachep.dir_ty == SBT) { + homenode_stats_t.readAc.access = + .67 * unicache.caches->l_ip.num_rw_ports * cachep.dir_duty_cycle * + (1 - homenode_data_access); + homenode_stats_t.readAc.miss = 0; + homenode_stats_t.readAc.hit = + homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; + homenode_stats_t.writeAc.access = + .67 * unicache.caches->l_ip.num_rw_ports * cachep.dir_duty_cycle * + (1 - homenode_data_access); + homenode_stats_t.writeAc.miss = 0; + homenode_stats_t.writeAc.hit = + homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss; + homenode_tdp_stats = homenode_stats_t; + } + + unicache.missb->stats_t.readAc.access = + unicache.missb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.missb->stats_t.writeAc.access = + unicache.missb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.missb->tdp_stats = unicache.missb->stats_t; + + unicache.ifb->stats_t.readAc.access = + unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.ifb->stats_t.writeAc.access = + unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.ifb->tdp_stats = unicache.ifb->stats_t; + + unicache.prefetchb->stats_t.readAc.access = + unicache.prefetchb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.prefetchb->stats_t.writeAc.access = + unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t; + + unicache.wbb->stats_t.readAc.access = + unicache.wbb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.wbb->stats_t.writeAc.access = + unicache.wbb->l_ip.num_search_ports * cachep.duty_cycle; + unicache.wbb->tdp_stats = unicache.wbb->stats_t; + } else { + unicache.caches->stats_t.readAc.access = + unicache.caches->l_ip.num_search_ports * cachep.duty_cycle; + unicache.caches->stats_t.readAc.miss = 0; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = 0; + unicache.caches->stats_t.writeAc.miss = 0; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->tdp_stats = unicache.caches->stats_t; + } + + } else { + // init stats for runtime power (RTP) + if (cacheL == L2) { + unicache.caches->stats_t.readAc.access = + XML->sys.L2[ithCache].read_accesses; + unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = + XML->sys.L2[ithCache].write_accesses; + unicache.caches->stats_t.writeAc.miss = + XML->sys.L2[ithCache].write_misses; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->rtp_stats = unicache.caches->stats_t; + + if (cachep.dir_ty == SBT) { + homenode_rtp_stats.readAc.access = + XML->sys.L2[ithCache].homenode_read_accesses; + homenode_rtp_stats.readAc.miss = + XML->sys.L2[ithCache].homenode_read_misses; + homenode_rtp_stats.readAc.hit = + homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; + homenode_rtp_stats.writeAc.access = + XML->sys.L2[ithCache].homenode_write_accesses; + homenode_rtp_stats.writeAc.miss = + XML->sys.L2[ithCache].homenode_write_misses; + homenode_rtp_stats.writeAc.hit = + homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; + } + } else if (cacheL == L3) { + unicache.caches->stats_t.readAc.access = + XML->sys.L3[ithCache].read_accesses; + unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = + XML->sys.L3[ithCache].write_accesses; + unicache.caches->stats_t.writeAc.miss = + XML->sys.L3[ithCache].write_misses; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->rtp_stats = unicache.caches->stats_t; + + if (cachep.dir_ty == SBT) { + homenode_rtp_stats.readAc.access = + XML->sys.L3[ithCache].homenode_read_accesses; + homenode_rtp_stats.readAc.miss = + XML->sys.L3[ithCache].homenode_read_misses; + homenode_rtp_stats.readAc.hit = + homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; + homenode_rtp_stats.writeAc.access = + XML->sys.L3[ithCache].homenode_write_accesses; + homenode_rtp_stats.writeAc.miss = + XML->sys.L3[ithCache].homenode_write_misses; + homenode_rtp_stats.writeAc.hit = + homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; + } + } else if (cacheL == L1Directory) { + unicache.caches->stats_t.readAc.access = + XML->sys.L1Directory[ithCache].read_accesses; + unicache.caches->stats_t.readAc.miss = + XML->sys.L1Directory[ithCache].read_misses; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = + XML->sys.L1Directory[ithCache].write_accesses; + unicache.caches->stats_t.writeAc.miss = + XML->sys.L1Directory[ithCache].write_misses; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->rtp_stats = unicache.caches->stats_t; + } else if (cacheL == L2Directory) { + unicache.caches->stats_t.readAc.access = + XML->sys.L2Directory[ithCache].read_accesses; + unicache.caches->stats_t.readAc.miss = + XML->sys.L2Directory[ithCache].read_misses; + unicache.caches->stats_t.readAc.hit = + unicache.caches->stats_t.readAc.access - + unicache.caches->stats_t.readAc.miss; + unicache.caches->stats_t.writeAc.access = + XML->sys.L2Directory[ithCache].write_accesses; + unicache.caches->stats_t.writeAc.miss = + XML->sys.L2Directory[ithCache].write_misses; + unicache.caches->stats_t.writeAc.hit = + unicache.caches->stats_t.writeAc.access - + unicache.caches->stats_t.writeAc.miss; + unicache.caches->rtp_stats = unicache.caches->stats_t; + } + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && + cacheL == + L2Directory))) { // Assuming write back and write-allocate cache + + unicache.missb->stats_t.readAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.missb->stats_t.writeAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.missb->rtp_stats = unicache.missb->stats_t; + + unicache.ifb->stats_t.readAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.ifb->stats_t.writeAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.ifb->rtp_stats = unicache.ifb->stats_t; + + unicache.prefetchb->stats_t.readAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.prefetchb->stats_t.writeAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; + + unicache.wbb->stats_t.readAc.access = + unicache.caches->stats_t.writeAc.miss; + unicache.wbb->stats_t.writeAc.access = + unicache.caches->stats_t.writeAc.miss; + if (cachep.dir_ty == SBT) { + unicache.missb->stats_t.readAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.missb->stats_t.writeAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.missb->rtp_stats = unicache.missb->stats_t; + + unicache.missb->stats_t.readAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.missb->stats_t.writeAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.missb->rtp_stats = unicache.missb->stats_t; + + unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; + unicache.ifb->rtp_stats = unicache.ifb->stats_t; + + unicache.prefetchb->stats_t.readAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.prefetchb->stats_t.writeAc.access += + homenode_rtp_stats.writeAc.miss; + unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; + + unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; + } + unicache.wbb->rtp_stats = unicache.wbb->stats_t; + } + } -void SharedCache::computeEnergy(bool is_tdp) -{ - double homenode_data_access = (cachep.dir_ty==SBT)? 0.9:1.0; - if (is_tdp) - { - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - //init stats for Peak - unicache.caches->stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = .33*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.readAc.miss = 0; - homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; - homenode_stats_t.writeAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.writeAc.miss = 0; - homenode_stats_t.writeAc.hit = homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss; - homenode_tdp_stats = homenode_stats_t; - } - - unicache.missb->stats_t.readAc.access = unicache.missb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.missb->stats_t.writeAc.access = unicache.missb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.missb->tdp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.ifb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.ifb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.ifb->tdp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.prefetchb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.prefetchb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.wbb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.wbb->stats_t.writeAc.access = unicache.wbb->l_ip.num_search_ports*cachep.duty_cycle; - unicache.wbb->tdp_stats = unicache.wbb->stats_t; - } - else - { - unicache.caches->stats_t.readAc.access = unicache.caches->l_ip.num_search_ports*cachep.duty_cycle; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = 0; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - } - - } - else - { - //init stats for runtime power (RTP) - if (cacheL==L2) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L2[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L2[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L2[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L2[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L3) - { - unicache.caches->stats_t.readAc.access = XML->sys.L3[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L3[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L3[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L3[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L3[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L3[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L3[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L1Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L1Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L1Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L1Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L1Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - else if (cacheL==L2Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { //Assuming write back and write-allocate cache - - unicache.missb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss ; - unicache.missb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.wbb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - if (cachep.dir_ty==SBT) - { - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - } - unicache.wbb->rtp_stats = unicache.wbb->stats_t; - - } - - } - - unicache.power_t.reset(); - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.hit*unicache.caches->local_result.power.readOp.dynamic+ - unicache.caches->stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - unicache.caches->stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.writeOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic);//write miss will also generate a write later - - if (cachep.dir_ty==SBT) - { - unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * (unicache.caches->local_result.data_array2->power.readOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic) + - homenode_stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.hit*(unicache.caches->local_result.data_array2->power.writeOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - homenode_stats_t.writeAc.miss*unicache.caches->local_result.power.writeOp.dynamic);//write miss on dynamic home node will generate a replacement write on whole cache block - - - } - - unicache.power_t.readOp.dynamic += unicache.missb->stats_t.readAc.access*unicache.missb->local_result.power.searchOp.dynamic + - unicache.missb->stats_t.writeAc.access*unicache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - unicache.power_t.readOp.dynamic += unicache.ifb->stats_t.readAc.access*unicache.ifb->local_result.power.searchOp.dynamic + - unicache.ifb->stats_t.writeAc.access*unicache.ifb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.prefetchb->stats_t.readAc.access*unicache.prefetchb->local_result.power.searchOp.dynamic + - unicache.prefetchb->stats_t.writeAc.access*unicache.prefetchb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.wbb->stats_t.readAc.access*unicache.wbb->local_result.power.searchOp.dynamic + - unicache.wbb->stats_t.writeAc.access*unicache.wbb->local_result.power.writeOp.dynamic; - } - else - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.access*unicache.caches->local_result.power.searchOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic); - } + unicache.power_t.reset(); + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + unicache.power_t.readOp.dynamic += + (unicache.caches->stats_t.readAc.hit * + unicache.caches->local_result.power.readOp.dynamic + + unicache.caches->stats_t.readAc.miss * + unicache.caches->local_result.tag_array2->power.readOp.dynamic + + unicache.caches->stats_t.writeAc.miss * + unicache.caches->local_result.tag_array2->power.writeOp.dynamic + + unicache.caches->stats_t.writeAc.access * + unicache.caches->local_result.power.writeOp + .dynamic); // write miss will also generate a write later + + if (cachep.dir_ty == SBT) { + unicache.power_t.readOp.dynamic += + homenode_stats_t.readAc.hit * + (unicache.caches->local_result.data_array2->power.readOp.dynamic * + dir_overhead + + unicache.caches->local_result.tag_array2->power.readOp.dynamic) + + homenode_stats_t.readAc.miss * + unicache.caches->local_result.tag_array2->power.readOp.dynamic + + homenode_stats_t.writeAc.miss * + unicache.caches->local_result.tag_array2->power.readOp.dynamic + + homenode_stats_t.writeAc.hit * + (unicache.caches->local_result.data_array2->power.writeOp + .dynamic * + dir_overhead + + unicache.caches->local_result.tag_array2->power.readOp.dynamic + + homenode_stats_t.writeAc.miss * + unicache.caches->local_result.power.writeOp + .dynamic); // write miss on dynamic home node will + // generate a replacement write on whole cache + // block + } + + unicache.power_t.readOp.dynamic += + unicache.missb->stats_t.readAc.access * + unicache.missb->local_result.power.searchOp.dynamic + + unicache.missb->stats_t.writeAc.access * + unicache.missb->local_result.power.writeOp + .dynamic; // each access to missb involves a CAM and a write + unicache.power_t.readOp.dynamic += + unicache.ifb->stats_t.readAc.access * + unicache.ifb->local_result.power.searchOp.dynamic + + unicache.ifb->stats_t.writeAc.access * + unicache.ifb->local_result.power.writeOp.dynamic; + unicache.power_t.readOp.dynamic += + unicache.prefetchb->stats_t.readAc.access * + unicache.prefetchb->local_result.power.searchOp.dynamic + + unicache.prefetchb->stats_t.writeAc.access * + unicache.prefetchb->local_result.power.writeOp.dynamic; + unicache.power_t.readOp.dynamic += + unicache.wbb->stats_t.readAc.access * + unicache.wbb->local_result.power.searchOp.dynamic + + unicache.wbb->stats_t.writeAc.access * + unicache.wbb->local_result.power.writeOp.dynamic; + } else { + unicache.power_t.readOp.dynamic += + (unicache.caches->stats_t.readAc.access * + unicache.caches->local_result.power.searchOp.dynamic + + unicache.caches->stats_t.writeAc.access * + unicache.caches->local_result.power.writeOp.dynamic); + } - if (is_tdp) - { - unicache.power = unicache.power_t + (unicache.caches->local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power = unicache.power+ - (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - power = power + unicache.power; -// cout<<"unicache.caches->local_result.power.readOp.dynamic"<local_result.power.readOp.dynamic<local_result.power.writeOp.dynamic"<local_result.power.writeOp.dynamic<local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.rt_power = unicache.rt_power + - (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - rt_power = rt_power + unicache.rt_power; - } + if (is_tdp) { + unicache.power = + unicache.power_t + (unicache.caches->local_result.power) * pppm_lkg; + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + unicache.power = + unicache.power + (unicache.missb->local_result.power + + unicache.ifb->local_result.power + + unicache.prefetchb->local_result.power + + unicache.wbb->local_result.power) * + pppm_lkg; + } + power = power + unicache.power; + // cout<<"unicache.caches->local_result.power.readOp.dynamic"<local_result.power.readOp.dynamic<local_result.power.writeOp.dynamic"<local_result.power.writeOp.dynamic<local_result.power) * pppm_lkg; + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + unicache.rt_power = + unicache.rt_power + (unicache.missb->local_result.power + + unicache.ifb->local_result.power + + unicache.prefetchb->local_result.power + + unicache.wbb->local_result.power) * + pppm_lkg; + } + rt_power = rt_power + unicache.rt_power; + } } -void SharedCache::displayEnergy(uint32_t indent,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) - { - cout << (XML->sys.Private_L2? indent_str:"")<< cachep.name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*cachep.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - if (power_gating) cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/cachep.executionTime << " W" << endl; - cout <sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << (XML->sys.Private_L2 ? indent_str : "") << cachep.name << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * cachep.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str << "Runtime Dynamic = " + << rt_power.readOp.dynamic / cachep.executionTime << " W" << endl; + cout << endl; + } else { + } } -//void SharedCache::computeMaxPower() +// void SharedCache::computeMaxPower() //{ // //Compute maximum power and runtime power. -// //When computing runtime power, McPAT gets or reasons out the statistics based on XML input. -// maxPower = 0.0; +// //When computing runtime power, McPAT gets or reasons out the statistics +// based on XML input. maxPower = 0.0; // //llCache,itlb // llCache.maxPower = 0.0; -// llCache.maxPower += (llCache.caches.l_ip.num_rw_ports*(0.67*llCache.caches.local_result.power.readOp.dynamic+0.33*llCache.caches.local_result.power.writeOp.dynamic) +// llCache.maxPower += +// (llCache.caches.l_ip.num_rw_ports*(0.67*llCache.caches.local_result.power.readOp.dynamic+0.33*llCache.caches.local_result.power.writeOp.dynamic) // +llCache.caches.l_ip.num_rd_ports*llCache.caches.local_result.power.readOp.dynamic+llCache.caches.l_ip.num_wr_ports*llCache.caches.local_result.power.writeOp.dynamic // +llCache.caches.l_ip.num_se_rd_ports*llCache.caches.local_result.power.readOp.dynamic)*clockRate; // ///cout<<"llCache.maxPower=" <sys.first_level_dir==1) // { -// inv_dir.maxPower = inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size; -// cc.power.readOp.dynamic = inv_dir.maxPower*scktRatio*64/XML->sys.domain_size; -// cc.power.readOp.leakage = inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*64/XML->sys.domain_size; +// inv_dir.maxPower = +// inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size; +// cc.power.readOp.dynamic = +// inv_dir.maxPower*scktRatio*64/XML->sys.domain_size; +// cc.power.readOp.leakage = +// inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*64/XML->sys.domain_size; // // cc.area.set_area(inv_dir.area*64/XML->sys.domain_size); // cout<<"CC area="<sys.L2[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - cachep.capacity = XML->sys.L2[ithCache].L2_config[0]; - cachep.blockW = XML->sys.L2[ithCache].L2_config[1]; - cachep.assoc = XML->sys.L2[ithCache].L2_config[2]; - cachep.nbanks = XML->sys.L2[ithCache].L2_config[3]; - cachep.throughput = XML->sys.L2[ithCache].L2_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2[ithCache].L2_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - - if (XML->sys.Private_L2 && XML->sys.core[ithCache].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.core[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.core[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.core[ithCache].vdd; - } - - if (XML->sys.Private_L2 && XML->sys.core[ithCache].power_gating_vcc >-1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.core[ithCache].power_gating_vcc; - } - if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L2[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L2[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L2[ithCache].vdd; - } - if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].power_gating_vcc >-1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.L2[ithCache].power_gating_vcc; - } - } - else if (cacheL==L3) - { - cachep.name = "L3"; - cachep.clockRate = XML->sys.L3[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - cachep.capacity = XML->sys.L3[ithCache].L3_config[0]; - cachep.blockW = XML->sys.L3[ithCache].L3_config[1]; - cachep.assoc = XML->sys.L3[ithCache].L3_config[2]; - cachep.nbanks = XML->sys.L3[ithCache].L3_config[3]; - cachep.throughput = XML->sys.L3[ithCache].L3_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L3[ithCache].L3_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L3[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - if ( XML->sys.L3[ithCache].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L3[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L3[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L3[ithCache].vdd; - } - - if (XML->sys.L3[ithCache].power_gating_vcc >-1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.L3[ithCache].power_gating_vcc; - } - } - else if (cacheL==L1Directory) - { - cachep.name = "First Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L1Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L1Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L1Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L1Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; - - if ( XML->sys.L1Directory[ithCache].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L1Directory[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L1Directory[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L1Directory[ithCache].vdd; - } - - if (XML->sys.L1Directory[ithCache].power_gating_vcc >-1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.L1Directory[ithCache].power_gating_vcc; - } - } - else if (cacheL==L2Directory) - { - cachep.name = "Second Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L2Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L2Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; - - if ( XML->sys.L2Directory[ithCache].vdd>0) - { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L2Directory[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L2Directory[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L2Directory[ithCache].vdd; - } - - if (XML->sys.L2Directory[ithCache].power_gating_vcc >-1) - { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.L2Directory[ithCache].power_gating_vcc; - } - } - //cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35; - - +void SharedCache::set_cache_param() { + if (cacheL == L2) { + cachep.name = "L2"; + cachep.clockRate = XML->sys.L2[ithCache].clockrate; + cachep.clockRate *= 1e6; + cachep.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L2[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L2[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L2[ithCache].device_type; + cachep.capacity = XML->sys.L2[ithCache].L2_config[0]; + cachep.blockW = XML->sys.L2[ithCache].L2_config[1]; + cachep.assoc = XML->sys.L2[ithCache].L2_config[2]; + cachep.nbanks = XML->sys.L2[ithCache].L2_config[3]; + cachep.throughput = XML->sys.L2[ithCache].L2_config[4] / cachep.clockRate; + cachep.latency = XML->sys.L2[ithCache].L2_config[5] / cachep.clockRate; + cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; + cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; + cachep.prefetchb_size = XML->sys.L2[ithCache].buffer_sizes[2]; + cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; + cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle; + if (!XML->sys.L2[ithCache].merged_dir) { + cachep.dir_ty = NonDir; + } else { + cachep.dir_ty = SBT; + cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; + } + + if (XML->sys.Private_L2 && XML->sys.core[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.core[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.core[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.core[ithCache].vdd; + } + + if (XML->sys.Private_L2 && XML->sys.core[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.core[ithCache].power_gating_vcc; + } + if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L2[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L2[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L2[ithCache].vdd; + } + if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L2[ithCache].power_gating_vcc; + } + } else if (cacheL == L3) { + cachep.name = "L3"; + cachep.clockRate = XML->sys.L3[ithCache].clockrate; + cachep.clockRate *= 1e6; + cachep.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L3[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L3[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L3[ithCache].device_type; + cachep.capacity = XML->sys.L3[ithCache].L3_config[0]; + cachep.blockW = XML->sys.L3[ithCache].L3_config[1]; + cachep.assoc = XML->sys.L3[ithCache].L3_config[2]; + cachep.nbanks = XML->sys.L3[ithCache].L3_config[3]; + cachep.throughput = XML->sys.L3[ithCache].L3_config[4] / cachep.clockRate; + cachep.latency = XML->sys.L3[ithCache].L3_config[5] / cachep.clockRate; + cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; + cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; + cachep.prefetchb_size = XML->sys.L3[ithCache].buffer_sizes[2]; + cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; + cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle; + if (!XML->sys.L2[ithCache].merged_dir) { + cachep.dir_ty = NonDir; + } else { + cachep.dir_ty = SBT; + cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; + } + if (XML->sys.L3[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L3[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L3[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L3[ithCache].vdd; + } + + if (XML->sys.L3[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L3[ithCache].power_gating_vcc; + } + } else if (cacheL == L1Directory) { + cachep.name = "First Level Directory"; + cachep.dir_ty = + (enum Dir_type)XML->sys.L1Directory[ithCache].Directory_type; + cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate; + cachep.clockRate *= 1e6; + cachep.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L1Directory[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L1Directory[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L1Directory[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L1Directory[ithCache].device_type; + cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; + cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; + cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; + cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; + cachep.throughput = + XML->sys.L1Directory[ithCache].Dir_config[4] / cachep.clockRate; + cachep.latency = + XML->sys.L1Directory[ithCache].Dir_config[5] / cachep.clockRate; + cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; + cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; + cachep.prefetchb_size = XML->sys.L1Directory[ithCache].buffer_sizes[2]; + cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; + cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; + + if (XML->sys.L1Directory[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L1Directory[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L1Directory[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L1Directory[ithCache].vdd; + } + + if (XML->sys.L1Directory[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L1Directory[ithCache].power_gating_vcc; + } + } else if (cacheL == L2Directory) { + cachep.name = "Second Level Directory"; + cachep.dir_ty = + (enum Dir_type)XML->sys.L2Directory[ithCache].Directory_type; + cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate; + cachep.clockRate *= 1e6; + cachep.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L2Directory[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L2Directory[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L2Directory[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L2Directory[ithCache].device_type; + cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; + cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; + cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; + cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; + cachep.throughput = + XML->sys.L2Directory[ithCache].Dir_config[4] / cachep.clockRate; + cachep.latency = + XML->sys.L2Directory[ithCache].Dir_config[5] / cachep.clockRate; + cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; + cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; + cachep.prefetchb_size = XML->sys.L2Directory[ithCache].buffer_sizes[2]; + cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; + cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; + + if (XML->sys.L2Directory[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L2Directory[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L2Directory[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L2Directory[ithCache].vdd; + } + + if (XML->sys.L2Directory[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L2Directory[ithCache].power_gating_vcc; + } + } + // cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35; } - diff --git a/sharedcache.h b/sharedcache.h index 198d39a..b288326 100644 --- a/sharedcache.h +++ b/sharedcache.h @@ -33,56 +33,58 @@ #define SHAREDCACHE_H_ #include "XML_Parse.h" #include "area.h" -#include "parameter.h" #include "array.h" +#include "basic_components.h" #include "logic.h" +#include "parameter.h" + #include -#include "basic_components.h" -class SharedCache :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - enum cache_level cacheL; - DataCache unicache;//Shared cache - CacheDynParam cachep; - statsDef homenode_tdp_stats; - statsDef homenode_rtp_stats; - statsDef homenode_stats_t; - double dir_overhead; - // cache_processor llCache,directory, directory1, inv_dir; +class SharedCache : public Component { +public: + ParseXML *XML; + int ithCache; + InputParameter interface_ip; + enum cache_level cacheL; + DataCache unicache; // Shared cache + CacheDynParam cachep; + statsDef homenode_tdp_stats; + statsDef homenode_rtp_stats; + statsDef homenode_stats_t; + double dir_overhead; + // cache_processor llCache,directory, directory1, inv_dir; - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, executionTime; - // Component L2Tot, cc, cc1, ccTot; + // pipeline pipeLogicCache, pipeLogicDirectory; + // clock_network clockNetwork; + double scktRatio, executionTime; + // Component L2Tot, cc, cc1, ccTot; - SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2); - void set_cache_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~SharedCache(){}; + SharedCache(ParseXML *XML_interface, int ithCache_, + InputParameter *interface_ip_, enum cache_level cacheL_ = L2); + void set_cache_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, bool is_tdp = true); + ~SharedCache(){}; }; -class CCdir :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - DataCache dc;//Shared cache - ArrayST * shadow_dir; -// cache_processor llCache,directory, directory1, inv_dir; +class CCdir : public Component { +public: + ParseXML *XML; + int ithCache; + InputParameter interface_ip; + DataCache dc; // Shared cache + ArrayST *shadow_dir; + // cache_processor llCache,directory, directory1, inv_dir; - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, clockRate, executionTime; - Component L2Tot, cc, cc1, ccTot; + // pipeline pipeLogicCache, pipeLogicDirectory; + // clock_network clockNetwork; + double scktRatio, clockRate, executionTime; + Component L2Tot, cc, cc1, ccTot; - CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~CCdir(); + CCdir(ParseXML *XML_interface, int ithCache_, InputParameter *interface_ip_); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, bool is_tdp = true); + ~CCdir(); }; #endif /* SHAREDCACHE_H_ */ diff --git a/util/format.sh b/util/format.sh new file mode 100755 index 0000000..ab3c419 --- /dev/null +++ b/util/format.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Format C Code: +find -name '*.cpp' -o -name '*.h' -o -name '*.hh' -o -name '*.c' -o -name '*.cc' | xargs clang-format -i --verbose + +# Format Python Code: +yapf -ir -vv . diff --git a/util/run-clang-tidy.py b/util/run-clang-tidy.py new file mode 100755 index 0000000..1eb1352 --- /dev/null +++ b/util/run-clang-tidy.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python +# +#===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# +# FIXME: Integrate with clang-tidy-diff.py + +""" +Parallel clang-tidy runner +========================== + +Runs clang-tidy over all files in a compilation database. Requires clang-tidy +and clang-apply-replacements in $PATH. + +Example invocations. +- Run clang-tidy on all files in the current working directory with a default + set of checks and show warnings in the cpp files and all project headers. + run-clang-tidy.py $PWD + +- Fix all header guards. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard + +- Fix all header guards included from clang-tidy and header guards + for clang-tidy headers. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ + -header-filter=extra/clang-tidy + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = './' + while not os.path.isfile(os.path.join(result, path)): + if os.path.realpath(result) == '/': + print('Error: could not find compilation database.') + sys.exit(1) + result += '../' + return os.path.realpath(result) + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, + header_filter, extra_arg, extra_arg_before, quiet, + config): + """Gets a command line for clang-tidy.""" + start = [clang_tidy_binary] + if header_filter is not None: + start.append('-header-filter=' + header_filter) + if checks: + start.append('-checks=' + checks) + if tmpdir is not None: + start.append('-export-fixes') + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + start.append(name) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + if quiet: + start.append('-quiet') + if config: + start.append('-config=' + config) + start.append(f) + return start + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey="Diagnostics" + merged=[] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = { 'MainSourceFile': '', mergekey: merged } + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def check_clang_apply_replacements_binary(args): + """Checks if invoking supplied clang-apply-replacements binary works.""" + try: + subprocess.check_call([args.clang_apply_replacements_binary, '--version']) + except: + print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' + 'binary correctly specified?', file=sys.stderr) + traceback.print_exc() + sys.exit(1) + + +def apply_fixes(args, tmpdir): + """Calls clang-apply-fixes on a given directory.""" + invocation = [args.clang_apply_replacements_binary] + if args.format: + invocation.append('-format') + if args.style: + invocation.append('-style=' + args.style) + invocation.append(tmpdir) + subprocess.call(invocation) + + +def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): + """Takes filenames out of queue and runs clang-tidy on them.""" + while True: + name = queue.get() + invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, + tmpdir, build_path, args.header_filter, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config) + + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' + 'in a compilation database. Requires ' + 'clang-tidy and clang-apply-replacements in ' + '$PATH.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy', + help='path to clang-tidy binary') + parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', + default='clang-apply-replacements', + help='path to clang-apply-replacements binary') + parser.add_argument('-checks', default=None, + help='checks filter, when not specified, use clang-tidy ' + 'default') + parser.add_argument('-config', default=None, + help='Specifies a configuration in YAML/JSON format: ' + ' -config="{Checks: \'*\', ' + ' CheckOptions: [{key: x, ' + ' value: y}]}" ' + 'When the value is empty, clang-tidy will ' + 'attempt to find a file named .clang-tidy for ' + 'each source file in its parent directories.') + parser.add_argument('-header-filter', default=None, + help='regular expression matching the names of the ' + 'headers to output diagnostics from. Diagnostics from ' + 'the main file of each translation unit are always ' + 'displayed.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-j', type=int, default=0, + help='number of tidy instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-fix', action='store_true', help='apply fix-its') + parser.add_argument('-format', action='store_true', help='Reformat code ' + 'after applying fixes') + parser.add_argument('-style', default='file', help='The style of reformat ' + 'code after applying fixes') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-tidy in quiet mode') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + try: + invocation = [args.clang_tidy_binary, '-list-checks'] + invocation.append('-p=' + build_path) + if args.checks: + invocation.append('-checks=' + args.checks) + invocation.append('-') + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-tidy.", file=sys.stderr) + sys.exit(1) + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = [make_absolute(entry['file'], entry['directory']) + for entry in database] + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + tmpdir = None + if args.fix or (yaml and args.export_fixes): + check_clang_apply_replacements_binary(args) + tmpdir = tempfile.mkdtemp() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_tidy, + args=(args, tmpdir, build_path, task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + if tmpdir: + shutil.rmtree(tmpdir) + os.kill(0, 9) + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + print('Error exporting fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if args.fix: + print('Applying fixes ...') + try: + apply_fixes(args, tmpdir) + except: + print('Error applying fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if tmpdir: + shutil.rmtree(tmpdir) + sys.exit(return_code) + +if __name__ == '__main__': + main() diff --git a/version.h b/version.h index 2aa2eb5..ec8e616 100644 --- a/version.h +++ b/version.h @@ -32,9 +32,9 @@ #ifndef VERSION_H_ #define VERSION_H_ -#define VER_MAJOR 1 -#define VER_MINOR 3 +#define VER_MAJOR 1 +#define VER_MINOR 3 -#define VER_UPDATE "Feb, 2015" +#define VER_UPDATE "Feb, 2015" #endif /* VERSION_H_ */ diff --git a/xmlParser.cc b/xmlParser.cc index 968658a..beca68c 100644 --- a/xmlParser.cc +++ b/xmlParser.cc @@ -37,23 +37,23 @@ * * NOTE: * - * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file - * the "openFileHelper" function will always display error messages inside the - * console instead of inside a message-box-window. Message-box-windows are + * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this + *file the "openFileHelper" function will always display error messages inside + *the console instead of inside a message-box-window. Message-box-windows are * available on windows 9x/NT/2000/XP/Vista only. * - * The following license terms for the "XMLParser library from Business-Insight" apply to projects - * that are in some way related to - * the "mcpat project", including applications - * using "mcpat project" and tools developed - * for enhancing "mcpat project". All other projects - * (not related to "mcpat project") have to use the "XMLParser library from Business-Insight" - * code under the Aladdin Free Public License (AFPL) - * See the file "AFPL-license.txt" for more informations about the AFPL license. - * (see http://www.artifex.com/downloads/doc/Public.htm for detailed AFPL terms) + * The following license terms for the "XMLParser library from Business-Insight" + *apply to projects that are in some way related to the "mcpat project", + *including applications using "mcpat project" and tools developed for enhancing + *"mcpat project". All other projects (not related to "mcpat project") have to + *use the "XMLParser library from Business-Insight" code under the Aladdin Free + *Public License (AFPL) See the file "AFPL-license.txt" for more informations + *about the AFPL license. (see http://www.artifex.com/downloads/doc/Public.htm + *for detailed AFPL terms) * - * Redistribution and use of the "XMLParser library from Business-Insight" in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * Redistribution and use of the "XMLParser library from Business-Insight" in + *source and binary forms, with or without modification, are permitted provided + *that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright @@ -91,265 +91,364 @@ //#endif #define WIN32_LEAN_AND_MEAN #include // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files - // to have "MessageBoxA" to display error messages for openFilHelper +// to have "MessageBoxA" to display error messages for openFilHelper #endif -#include #include +#include #include -#include #include +#include XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); } -void freeXMLString(XMLSTR t){if(t)free(t);} +void freeXMLString(XMLSTR t) { + if (t) + free(t); +} -static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; -static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; +static XMLNode::XMLCharEncoding characterEncoding = XMLNode::char_encoding_UTF8; +static char guessWideCharChars = 1, dropWhiteSpace = 1, + removeCommentsInMiddleOfText = 1; -inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } +inline int mmin(const int t1, const int t2) { return t1 < t2 ? t1 : t2; } // You can modify the initialization of the variable "XMLClearTags" below // to change the clearTags that are currently recognized by the library. // The number on the second columns is the length of the string inside the // first column. The "") }, - { _CXML("") }, - { _CXML("") }, - { _CXML("
")    ,5,  _CXML("
") }, -// { _CXML("")}, - { NULL ,0, NULL } -}; +typedef struct { + XMLCSTR lpszOpen; + int openTagLen; + XMLCSTR lpszClose; +} ALLXMLClearTag; +static ALLXMLClearTag XMLClearTags[] = { + {_CXML("")}, + {_CXML("")}, + {_CXML("")}, + {_CXML("
"), 5, _CXML("
")}, + // { _CXML("")}, + {NULL, 0, NULL}}; // You can modify the initialization of the variable "XMLEntities" below -// to change the character entities that are currently recognized by the library. -// The number on the second columns is the length of the string inside the -// first column. Additionally, the syntaxes " " and " " are recognized. -typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; -static XMLCharacterEntity XMLEntities[] = -{ - { _CXML("&" ), 5, _CXML('&' )}, - { _CXML("<" ), 4, _CXML('<' )}, - { _CXML(">" ), 4, _CXML('>' )}, - { _CXML("""), 6, _CXML('\"')}, - { _CXML("'"), 6, _CXML('\'')}, - { NULL , 0, '\0' } -}; - -// When rendering the XMLNode to a string (using the "createXMLString" function), -// you can ask for a beautiful formatting. This formatting is using the -// following indentation character: +// to change the character entities that are currently recognized by the +// library. The number on the second columns is the length of the string inside +// the first column. Additionally, the syntaxes " " and " " are +// recognized. +typedef struct { + XMLCSTR s; + int l; + XMLCHAR c; +} XMLCharacterEntity; +static XMLCharacterEntity XMLEntities[] = { + {_CXML("&"), 5, _CXML('&')}, {_CXML("<"), 4, _CXML('<')}, + {_CXML(">"), 4, _CXML('>')}, {_CXML("""), 6, _CXML('\"')}, + {_CXML("'"), 6, _CXML('\'')}, {NULL, 0, '\0'}}; + +// When rendering the XMLNode to a string (using the "createXMLString" +// function), you can ask for a beautiful formatting. This formatting is using +// the following indentation character: #define INDENTCHAR _CXML('\t') // The following function parses the XML errors into a user friendly string. -// You can edit this to change the output language of the library to something else. -XMLCSTR XMLNode::getError(XMLError xerror) -{ - switch (xerror) - { - case eXMLErrorNone: return _CXML("No error"); - case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); - case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); - case eXMLErrorEmpty: return _CXML("Error: No XML data"); - case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); - case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); - case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); - case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); - case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); - case eXMLErrorNoElements: return _CXML("Error: No elements found"); - case eXMLErrorFileNotFound: return _CXML("Error: File not found"); - case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); - case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); - case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); - case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); - case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); - case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); - - case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); - case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); - case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); - case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); - }; - return _CXML("Unknown"); +// You can edit this to change the output language of the library to something +// else. +XMLCSTR XMLNode::getError(XMLError xerror) { + switch (xerror) { + case eXMLErrorNone: + return _CXML("No error"); + case eXMLErrorMissingEndTag: + return _CXML("Warning: Unmatched end tag"); + case eXMLErrorNoXMLTagFound: + return _CXML("Warning: No XML tag found"); + case eXMLErrorEmpty: + return _CXML("Error: No XML data"); + case eXMLErrorMissingTagName: + return _CXML("Error: Missing start tag name"); + case eXMLErrorMissingEndTagName: + return _CXML("Error: Missing end tag name"); + case eXMLErrorUnmatchedEndTag: + return _CXML("Error: Unmatched end tag"); + case eXMLErrorUnmatchedEndClearTag: + return _CXML("Error: Unmatched clear tag end"); + case eXMLErrorUnexpectedToken: + return _CXML("Error: Unexpected token found"); + case eXMLErrorNoElements: + return _CXML("Error: No elements found"); + case eXMLErrorFileNotFound: + return _CXML("Error: File not found"); + case eXMLErrorFirstTagNotFound: + return _CXML("Error: First Tag not found"); + case eXMLErrorUnknownCharacterEntity: + return _CXML("Error: Unknown character entity"); + case eXMLErrorCharacterCodeAbove255: + return _CXML("Error: Character code above 255 is forbidden in MultiByte " + "char mode."); + case eXMLErrorCharConversionError: + return _CXML( + "Error: unable to convert between WideChar and MultiByte chars"); + case eXMLErrorCannotOpenWriteFile: + return _CXML("Error: unable to open file for writing"); + case eXMLErrorCannotWriteFile: + return _CXML("Error: cannot write into file"); + + case eXMLErrorBase64DataSizeIsNotMultipleOf4: + return _CXML("Warning: Base64-string length is not a multiple of 4"); + case eXMLErrorBase64DecodeTruncatedData: + return _CXML("Warning: Base64-string is truncated"); + case eXMLErrorBase64DecodeIllegalCharacter: + return _CXML("Error: Base64-string contains an illegal character"); + case eXMLErrorBase64DecodeBufferTooSmall: + return _CXML("Error: Base64 decode output buffer is too small"); + }; + return _CXML("Unknown"); } ///////////////////////////////////////////////////////////////////////// // Here start the abstraction layer to be OS-independent // ///////////////////////////////////////////////////////////////////////// -// Here is an abstraction layer to access some common string manipulation functions. -// The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0, -// Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++. -// If you plan to "port" the library to a new system/compiler, all you have to do is -// to edit the following lines. +// Here is an abstraction layer to access some common string manipulation +// functions. The abstraction layer is currently working for gcc, Microsoft +// Visual Studio 6.0, Microsoft Visual Studio .NET, CC (sun compiler) and +// Borland C++. If you plan to "port" the library to a new system/compiler, all +// you have to do is to edit the following lines. #ifdef XML_NO_WIDE_CHAR char myIsTextWideChar(const void *b, int len) { return FALSE; } #else - #if defined (UNDER_CE) || !defined(_XMLWINDOWS) - char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode - { +#if defined(UNDER_CE) || !defined(_XMLWINDOWS) +char myIsTextWideChar(const void *b, + int len) // inspired by the Wine API: RtlIsTextUnicode +{ #ifdef sun - // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. - if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; + // for SPARC processors: wchar_t* buffers must always be alligned, otherwise + // it's a char* buffer. + if ((((unsigned long)b) % sizeof(wchar_t)) != 0) + return FALSE; #endif - const wchar_t *s=(const wchar_t*)b; + const wchar_t *s = (const wchar_t *)b; - // buffer too small: - if (len<(int)sizeof(wchar_t)) return FALSE; + // buffer too small: + if (len < (int)sizeof(wchar_t)) + return FALSE; - // odd length test - if (len&1) return FALSE; + // odd length test + if (len & 1) + return FALSE; - /* only checks the first 256 characters */ - len=mmin(256,len/sizeof(wchar_t)); + /* only checks the first 256 characters */ + len = mmin(256, len / sizeof(wchar_t)); - // Check for the special byte order: - if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; - if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE + // Check for the special byte order: + if (*((unsigned short *)s) == 0xFFFE) + return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; + if (*((unsigned short *)s) == 0xFEFF) + return TRUE; // IS_TEXT_UNICODE_SIGNATURE - // checks for ASCII characters in the UNICODE stream - int i,stats=0; - for (i=0; ilen/2) return TRUE; + // checks for ASCII characters in the UNICODE stream + int i, stats = 0; + for (i = 0; i < len; i++) + if (s[i] <= (unsigned short)255) + stats++; + if (stats > len / 2) + return TRUE; - // Check for UNICODE NULL chars - for (i=0; i - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } - #else - // for gcc - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } - #endif - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) - { - char *filenameAscii=myWideCharToMultiByte(filename); - FILE *f; - if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); - else f=fopen(filenameAscii,"wb"); - free(filenameAscii); - return f; - } - #else - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return strlen(c); } - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } - #endif - static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} +#ifdef XML_NO_WIDE_CHAR +char *myWideCharToMultiByte(const wchar_t *s) { return NULL; } +#else +char *myWideCharToMultiByte(const wchar_t *s) { + const wchar_t *ss = s; + int i = (int)wcsrtombs(NULL, &ss, 0, NULL); + if (i < 0) + return NULL; + char *d = (char *)malloc(i + 1); + wcsrtombs(d, &s, i, NULL); + d[i] = 0; + return d; +} +#endif +#ifdef _XMLWIDECHAR +wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) { + const char *ss = s; + int i = (int)mbsrtowcs(NULL, &ss, 0, NULL); + if (i < 0) + return NULL; + wchar_t *d = (wchar_t *)malloc((i + 1) * sizeof(wchar_t)); + mbsrtowcs(d, &s, i, NULL); + d[i] = 0; + return d; +} +int xstrlen(XMLCSTR c) { return wcslen(c); } +#ifdef sun +// for CC +#include +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1, c2); } +#else +// for gcc +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return wcscasecmp(c1, c2); +} +#endif +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)wcsstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)wcscpy(c1, c2); +} +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + char *filenameAscii = myWideCharToMultiByte(filename); + FILE *f; + if (mode[0] == _CXML('r')) + f = fopen(filenameAscii, "rb"); + else + f = fopen(filenameAscii, "wb"); + free(filenameAscii); + return f; +} +#else +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + return fopen(filename, mode); +} +static inline int xstrlen(XMLCSTR c) { return strlen(c); } +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return strcasecmp(c1, c2); +} +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)strstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)strcpy(c1, c2); +} +#endif +static inline int _strnicmp(const char *c1, const char *c2, int l) { + return strncasecmp(c1, c2, l); +} #endif - /////////////////////////////////////////////////////////////////////////////// // the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions // @@ -358,82 +457,161 @@ char myIsTextWideChar(const void *b, int len) { return FALSE; } // There are only here as "convenience" functions for the user. // If you don't need them, you can delete them without any trouble. #ifdef _XMLWIDECHAR - #ifdef _XMLWINDOWS - // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 - char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #else - #ifdef sun - // for CC - #include - char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } - #else - // for gcc - char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } - #endif - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #endif +#ifdef _XMLWINDOWS +// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland +// C++ Builder 6.0 +char xmltob(XMLCSTR t, int v) { + if (t && (*t)) + return (char)_wtoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) + return _wtoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) + return _wtol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) + wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} +#else +#ifdef sun +// for CC +#include +char xmltob(XMLCSTR t, int v) { + if (t) + return (char)wstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) + return (int)wstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) + return wstol(t, NULL, 10); + return v; +} +#else +// for gcc +char xmltob(XMLCSTR t, int v) { + if (t) + return (char)wcstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) + return (int)wcstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) + return wcstol(t, NULL, 10); + return v; +} +#endif +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) + wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} +#endif #else - char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } +char xmltob(XMLCSTR t, char v) { + if (t && (*t)) + return (char)atoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) + return atoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) + return atol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) + return atof(t); + return v; +} #endif -XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; } -XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } - +XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v) { + if (t) + return t; + return v; +} +XMLCHAR xmltoc(XMLCSTR t, XMLCHAR v) { + if (t && (*t)) + return *t; + return v; +} + ///////////////////////////////////////////////////////////////////////// // the "openFileHelper" function // ///////////////////////////////////////////////////////////////////////// -// Since each application has its own way to report and deal with errors, you should modify & rewrite -// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. -XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) -{ - // guess the value of the global parameter "characterEncoding" - // (the guess is based on the first 200 bytes of the file). - FILE *f=xfopen(filename,_CXML("rb")); - if (f) - { - char bb[205]; - int l=(int)fread(bb,1,200,f); - setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); - fclose(f); +// Since each application has its own way to report and deal with errors, you +// should modify & rewrite the following "openFileHelper" function to get an +// "error reporting mechanism" tailored to your needs. +XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) { + // guess the value of the global parameter "characterEncoding" + // (the guess is based on the first 200 bytes of the file). + FILE *f = xfopen(filename, _CXML("rb")); + if (f) { + char bb[205]; + int l = (int)fread(bb, 1, 200, f); + setGlobalOptions(guessCharEncoding(bb, l), guessWideCharChars, + dropWhiteSpace, removeCommentsInMiddleOfText); + fclose(f); + } + + // parse the file + XMLResults pResults; + XMLNode xnode = XMLNode::parseFile(filename, tag, &pResults); + + // display error message (if any) + if (pResults.error != eXMLErrorNone) { + // create message + char message[2000], *s1 = (char *)"", *s3 = (char *)""; + XMLCSTR s2 = _CXML(""); + if (pResults.error == eXMLErrorFirstTagNotFound) { + s1 = (char *)"First Tag should be '"; + s2 = tag; + s3 = (char *)"'.\n"; } - - // parse the file - XMLResults pResults; - XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); - - // display error message (if any) - if (pResults.error != eXMLErrorNone) - { - // create message - char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); - if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } - sprintf(message, + sprintf(message, #ifdef _XMLWIDECHAR - "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" + "XML Parsing error inside file '%S'.\n%S\nAt line %i, column " + "%i.\n%s%S%s" #else - "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" + "XML Parsing error inside file '%s'.\n%s\nAt line %i, column " + "%i.\n%s%s%s" #endif - ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); - - // display message -#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) - MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); + , + filename, XMLNode::getError(pResults.error), pResults.nLine, + pResults.nColumn, s1, s2, s3); + + // display message +#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && \ + !defined(_XMLPARSER_NO_MESSAGEBOX_) + MessageBoxA(NULL, message, "XML Parsing error", + MB_OK | MB_ICONERROR | MB_TOPMOST); #else - printf("%s",message); + printf("%s", message); #endif - exit(255); - } - return xnode; + exit(255); + } + return xnode; } ///////////////////////////////////////////////////////////////////////// @@ -443,400 +621,456 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) // You should normally not change anything below this point. #ifndef _XMLWIDECHAR -// If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte. -// If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes). -// If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes). -// This table is used as lookup-table to know the length of a character (in byte) based on the -// content of the first byte of the character. -// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). -static const char XML_utf8ByteTable[256] = -{ +// If "characterEncoding=ascii" then we assume that all characters have the same +// length of 1 byte. If "characterEncoding=UTF8" then the characters have +// different lengths (from 1 byte to 4 bytes). If "characterEncoding=ShiftJIS" +// then the characters have different lengths (from 1 byte to 2 bytes). This +// table is used as lookup-table to know the length of a character (in byte) +// based on the content of the first byte of the character. (note: if you modify +// this, you must always have XML_utf8ByteTable[0]=0 ). +static const char XML_utf8ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte - 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid -}; -static const char XML_legacyByteTable[256] = -{ - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 + 1, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte + 4, 4, 4, 4, 4, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid }; -static const char XML_sjisByteTable[256] = -{ +static const char XML_legacyByteTable[256] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const char XML_sjisByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0x80 0x81 to 0x9F 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 0xe0 to 0xef 2 bytes + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gb2312ByteTable[256] = -{ -// 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 +static const char XML_gb2312ByteTable[256] = { + // 0 1 2 3 4 5 6 7 8 9 a b c d e f + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xa0 0xa1 to 0xf7 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gbk_big5_ByteTable[256] = -{ +static const char XML_gbk_big5_ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0x80 0x81 to 0xfe 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0x90 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xa0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 // 0xf0 }; -static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" +static const char *XML_ByteTable = (const char *) + XML_utf8ByteTable; // the default is + // "characterEncoding=XMLNode::encoding_UTF8" #endif - XMLNode XMLNode::emptyXMLNode; -XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; -XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; +XMLClear XMLNode::emptyXMLClear = {NULL, NULL, NULL}; +XMLAttribute XMLNode::emptyXMLAttribute = {NULL, NULL}; // Enumeration used to decipher what type a token is -typedef enum XMLTokenTypeTag -{ - eTokenText = 0, - eTokenQuotedText, - eTokenTagStart, /* "<" */ - eTokenTagEnd, /* "" */ - eTokenEquals, /* "=" */ - eTokenDeclaration, /* "" */ - eTokenClear, - eTokenError +typedef enum XMLTokenTypeTag { + eTokenText = 0, + eTokenQuotedText, + eTokenTagStart, /* "<" */ + eTokenTagEnd, /* "" */ + eTokenEquals, /* "=" */ + eTokenDeclaration, /* "" */ + eTokenClear, + eTokenError } XMLTokenType; // Main structure used for parsing XML -typedef struct XML -{ - XMLCSTR lpXML; - XMLCSTR lpszText; - int nIndex,nIndexMissigEndTag; - enum XMLError error; - XMLCSTR lpEndTag; - int cbEndTag; - XMLCSTR lpNewElement; - int cbNewElement; - int nFirst; +typedef struct XML { + XMLCSTR lpXML; + XMLCSTR lpszText; + int nIndex, nIndexMissigEndTag; + enum XMLError error; + XMLCSTR lpEndTag; + int cbEndTag; + XMLCSTR lpNewElement; + int cbNewElement; + int nFirst; } XML; -typedef struct -{ - ALLXMLClearTag *pClr; - XMLCSTR pStr; +typedef struct { + ALLXMLClearTag *pClr; + XMLCSTR pStr; } NextToken; // Enumeration used when parsing attributes -typedef enum Attrib -{ - eAttribName = 0, - eAttribEquals, - eAttribValue -} Attrib; +typedef enum Attrib { eAttribName = 0, eAttribEquals, eAttribValue } Attrib; // Enumeration used when parsing elements to dictate whether we are currently // inside a tag -typedef enum Status -{ - eInsideTag = 0, - eOutsideTag -} Status; +typedef enum Status { eInsideTag = 0, eOutsideTag } Status; -XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const -{ - if (!d) return eXMLErrorNone; - FILE *f=xfopen(filename,_CXML("wb")); - if (!f) return eXMLErrorCannotOpenWriteFile; +XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, + char nFormat) const { + if (!d) + return eXMLErrorNone; + FILE *f = xfopen(filename, _CXML("wb")); + if (!f) + return eXMLErrorCannotOpenWriteFile; #ifdef _XMLWIDECHAR - unsigned char h[2]={ 0xFF, 0xFE }; - if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile; - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (!fwrite(L"\n",sizeof(wchar_t)*40,1,f)) - return eXMLErrorCannotWriteFile; - } + unsigned char h[2] = {0xFF, 0xFE}; + if (!fwrite(h, 2, 1, f)) + return eXMLErrorCannotWriteFile; + if ((!isDeclaration()) && + ((d->lpszName) || (!getChildNode().isDeclaration()))) { + if (!fwrite(L"\n", + sizeof(wchar_t) * 40, 1, f)) + return eXMLErrorCannotWriteFile; + } #else - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (characterEncoding==char_encoding_UTF8) - { - // header so that windows recognize the file as UTF-8: - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; - encoding="utf-8"; - } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; - - if (!encoding) encoding="ISO-8859-1"; - if (fprintf(f,"\n",encoding)<0) return eXMLErrorCannotWriteFile; - } else - { - if (characterEncoding==char_encoding_UTF8) - { - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; - } + if ((!isDeclaration()) && + ((d->lpszName) || (!getChildNode().isDeclaration()))) { + if (characterEncoding == char_encoding_UTF8) { + // header so that windows recognize the file as UTF-8: + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) + return eXMLErrorCannotWriteFile; + encoding = "utf-8"; + } else if (characterEncoding == char_encoding_ShiftJIS) + encoding = "SHIFT-JIS"; + + if (!encoding) + encoding = "ISO-8859-1"; + if (fprintf(f, "\n", encoding) < 0) + return eXMLErrorCannotWriteFile; + } else { + if (characterEncoding == char_encoding_UTF8) { + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) + return eXMLErrorCannotWriteFile; } + } #endif - int i; - XMLSTR t=createXMLString(nFormat,&i); - if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile; - if (fclose(f)!=0) return eXMLErrorCannotWriteFile; - free(t); - return eXMLErrorNone; + int i; + XMLSTR t = createXMLString(nFormat, &i); + if (!fwrite(t, sizeof(XMLCHAR) * i, 1, f)) + return eXMLErrorCannotWriteFile; + if (fclose(f) != 0) + return eXMLErrorCannotWriteFile; + free(t); + return eXMLErrorNone; } // Duplicate a given string. -XMLSTR stringDup(XMLCSTR lpszData, int cbData) -{ - if (lpszData==NULL) return NULL; - - XMLSTR lpszNew; - if (cbData==-1) cbData=(int)xstrlen(lpszData); - lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); - if (lpszNew) - { - memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); - lpszNew[cbData] = (XMLCHAR)NULL; - } - return lpszNew; -} +XMLSTR stringDup(XMLCSTR lpszData, int cbData) { + if (lpszData == NULL) + return NULL; -XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) -{ - XMLSTR dd=dest; - XMLCHAR ch; - XMLCharacterEntity *entity; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } - entity++; - } while(entity->s); + XMLSTR lpszNew; + if (cbData == -1) + cbData = (int)xstrlen(lpszData); + lpszNew = (XMLSTR)malloc((cbData + 1) * sizeof(XMLCHAR)); + if (lpszNew) { + memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); + lpszNew[cbData] = (XMLCHAR)NULL; + } + return lpszNew; +} + +XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest, XMLCSTR source) { + XMLSTR dd = dest; + XMLCHAR ch; + XMLCharacterEntity *entity; + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + xstrcpy(dest, entity->s); + dest += entity->l; + source++; + goto out_of_loop1; + } + entity++; + } while (entity->s); #ifdef _XMLWIDECHAR - *(dest++)=*(source++); + *(dest++) = *(source++); #else - switch(XML_ByteTable[(unsigned char)ch]) - { - case 4: *(dest++)=*(source++); - case 3: *(dest++)=*(source++); - case 2: *(dest++)=*(source++); - case 1: *(dest++)=*(source++); - } -#endif -out_of_loop1: - ; + switch (XML_ByteTable[(unsigned char)ch]) { + case 4: + *(dest++) = *(source++); + case 3: + *(dest++) = *(source++); + case 2: + *(dest++) = *(source++); + case 1: + *(dest++) = *(source++); } - *dest=0; - return dd; +#endif + out_of_loop1:; + } + *dest = 0; + return dd; } // private (used while rendering): -int ToXMLStringTool::lengthXMLString(XMLCSTR source) -{ - int r=0; - XMLCharacterEntity *entity; - XMLCHAR ch; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } - entity++; - } while(entity->s); +int ToXMLStringTool::lengthXMLString(XMLCSTR source) { + int r = 0; + XMLCharacterEntity *entity; + XMLCHAR ch; + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + r += entity->l; + source++; + goto out_of_loop1; + } + entity++; + } while (entity->s); #ifdef _XMLWIDECHAR - r++; source++; + r++; + source++; #else - ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; + ch = XML_ByteTable[(unsigned char)ch]; + r += ch; + source += ch; #endif -out_of_loop1: - ; - } - return r; + out_of_loop1:; + } + return r; } -ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } -void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } -XMLSTR ToXMLStringTool::toXML(XMLCSTR source) -{ - int l=lengthXMLString(source)+1; - if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); } - return toXMLUnSafe(buf,source); +ToXMLStringTool::~ToXMLStringTool() { freeBuffer(); } +void ToXMLStringTool::freeBuffer() { + if (buf) + free(buf); + buf = NULL; + buflen = 0; +} +XMLSTR ToXMLStringTool::toXML(XMLCSTR source) { + int l = lengthXMLString(source) + 1; + if (l > buflen) { + buflen = l; + buf = (XMLSTR)realloc(buf, l * sizeof(XMLCHAR)); + } + return toXMLUnSafe(buf, source); } // private: -XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) -{ - // This function is the opposite of the function "toXMLString". It decodes the escape - // sequences &, ", ', <, > and replace them by the characters - // &,",',<,>. This function is used internally by the XML Parser. All the calls to - // the XML library will always gives you back "decoded" strings. - // - // in: string (s) and length (lo) of string - // out: new allocated string converted from xml - if (!s) return NULL; - - int ll=0,j; - XMLSTR d; - XMLCSTR ss=s; - XMLCharacterEntity *entity; - while ((lo>0)&&(*s)) - { - if (*s==_CXML('&')) - { - if ((lo>2)&&(s[1]==_CXML('#'))) - { - s+=2; lo-=2; - if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } - while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; - if (*s!=_CXML(';')) - { - pXML->error=eXMLErrorUnknownCharacterEntity; - return NULL; - } - s++; lo--; - } else - { - entity=XMLEntities; - do - { - if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } - entity++; - } while(entity->s); - if (!entity->s) - { - pXML->error=eXMLErrorUnknownCharacterEntity; - return NULL; - } - } - } else - { +XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) { + // This function is the opposite of the function "toXMLString". It decodes the + // escape sequences &, ", ', <, > and replace them by the + // characters + // &,",',<,>. This function is used internally by the XML Parser. All the + // calls to the XML library will always gives you back "decoded" strings. + // + // in: string (s) and length (lo) of string + // out: new allocated string converted from xml + if (!s) + return NULL; + + int ll = 0, j; + XMLSTR d; + XMLCSTR ss = s; + XMLCharacterEntity *entity; + while ((lo > 0) && (*s)) { + if (*s == _CXML('&')) { + if ((lo > 2) && (s[1] == _CXML('#'))) { + s += 2; + lo -= 2; + if ((*s == _CXML('X')) || (*s == _CXML('x'))) { + s++; + lo--; + } + while ((*s) && (*s != _CXML(';')) && ((lo--) > 0)) + s++; + if (*s != _CXML(';')) { + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } + s++; + lo--; + } else { + entity = XMLEntities; + do { + if ((lo >= entity->l) && (xstrnicmp(s, entity->s, entity->l) == 0)) { + s += entity->l; + lo -= entity->l; + break; + } + entity++; + } while (entity->s); + if (!entity->s) { + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } + } + } else { #ifdef _XMLWIDECHAR - s++; lo--; + s++; + lo--; #else - j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; + j = XML_ByteTable[(unsigned char)*s]; + s += j; + lo -= j; + ll += j - 1; #endif - } - ll++; } - - d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); - s=d; - while (ll-->0) - { - if (*ss==_CXML('&')) - { - if (ss[1]==_CXML('#')) - { - ss+=2; j=0; - if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) - { - ss++; - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); - else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; - else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} - ss++; - } - } else - { - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} - ss++; - } - } + ll++; + } + + d = (XMLSTR)malloc((ll + 1) * sizeof(XMLCHAR)); + s = d; + while (ll-- > 0) { + if (*ss == _CXML('&')) { + if (ss[1] == _CXML('#')) { + ss += 2; + j = 0; + if ((*ss == _CXML('X')) || (*ss == _CXML('x'))) { + ss++; + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) + j = (j << 4) + *ss - _CXML('0'); + else if ((*ss >= _CXML('A')) && (*ss <= _CXML('F'))) + j = (j << 4) + *ss - _CXML('A') + 10; + else if ((*ss >= _CXML('a')) && (*ss <= _CXML('f'))) + j = (j << 4) + *ss - _CXML('a') + 10; + else { + free((void *)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } + ss++; + } + } else { + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) + j = (j * 10) + *ss - _CXML('0'); + else { + free((void *)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } + ss++; + } + } #ifndef _XMLWIDECHAR - if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} + if (j > 255) { + free((void *)s); + pXML->error = eXMLErrorCharacterCodeAbove255; + return NULL; + } #endif - (*d++)=(XMLCHAR)j; ss++; - } else - { - entity=XMLEntities; - do - { - if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } - entity++; - } while(entity->s); - } - } else - { + (*d++) = (XMLCHAR)j; + ss++; + } else { + entity = XMLEntities; + do { + if (xstrnicmp(ss, entity->s, entity->l) == 0) { + *(d++) = entity->c; + ss += entity->l; + break; + } + entity++; + } while (entity->s); + } + } else { #ifdef _XMLWIDECHAR - *(d++)=*(ss++); + *(d++) = *(ss++); #else - switch(XML_ByteTable[(unsigned char)*ss]) - { - case 4: *(d++)=*(ss++); ll--; - case 3: *(d++)=*(ss++); ll--; - case 2: *(d++)=*(ss++); ll--; - case 1: *(d++)=*(ss++); - } + switch (XML_ByteTable[(unsigned char)*ss]) { + case 4: + *(d++) = *(ss++); + ll--; + case 3: + *(d++) = *(ss++); + ll--; + case 2: + *(d++) = *(ss++); + ll--; + case 1: + *(d++) = *(ss++); + } #endif - } } - *d=0; - return (XMLSTR)s; + } + *d = 0; + return (XMLSTR)s; } -#define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r'))) +#define XML_isSPACECHAR(ch) \ + ((ch == _CXML('\n')) || (ch == _CXML(' ')) || (ch == _CXML('\t')) || \ + (ch == _CXML('\r'))) // private: char myTagCompare(XMLCSTR cclose, XMLCSTR copen) @@ -844,1233 +1078,1311 @@ char myTagCompare(XMLCSTR cclose, XMLCSTR copen) // return 0 if equals // return 1 if different { - if (!cclose) return 1; - int l=(int)xstrlen(cclose); - if (xstrnicmp(cclose, copen, l)!=0) return 1; - const XMLCHAR c=copen[l]; - if (XML_isSPACECHAR(c)|| - (c==_CXML('/' ))|| - (c==_CXML('<' ))|| - (c==_CXML('>' ))|| - (c==_CXML('=' ))) return 0; + if (!cclose) + return 1; + int l = (int)xstrlen(cclose); + if (xstrnicmp(cclose, copen, l) != 0) return 1; + const XMLCHAR c = copen[l]; + if (XML_isSPACECHAR(c) || (c == _CXML('/')) || (c == _CXML('<')) || + (c == _CXML('>')) || (c == _CXML('='))) + return 0; + return 1; } // Obtain the next character from the string. -static inline XMLCHAR getNextChar(XML *pXML) -{ - XMLCHAR ch = pXML->lpXML[pXML->nIndex]; +static inline XMLCHAR getNextChar(XML *pXML) { + XMLCHAR ch = pXML->lpXML[pXML->nIndex]; #ifdef _XMLWIDECHAR - if (ch!=0) pXML->nIndex++; + if (ch != 0) + pXML->nIndex++; #else - pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; + pXML->nIndex += XML_ByteTable[(unsigned char)ch]; #endif - return ch; + return ch; } // Find the next token in a string. // pcbToken contains the number of characters that have been read. -static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) -{ - NextToken result; - XMLCHAR ch; - XMLCHAR chTemp; - int indexStart,nFoundMatch,nIsText=FALSE; - result.pClr=NULL; // prevent warning - - // Find next non-white space character - do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); - - if (ch) - { - // Cache the current string pointer - result.pStr = &pXML->lpXML[indexStart]; - - // First check whether the token is in the clear tag list (meaning it - // does not need formatting). - ALLXMLClearTag *ctag=XMLClearTags; - do - { - if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0) - { - result.pClr=ctag; - pXML->nIndex+=ctag->openTagLen-1; - *pType=eTokenClear; - return result; - } - ctag++; - } while(ctag->lpszOpen); - - // If we didn't find a clear tag then check for standard tokens - switch(ch) - { - // Check for quotes - case _CXML('\''): - case _CXML('\"'): - // Type of token - *pType = eTokenQuotedText; - chTemp = ch; - - // Set the size - nFoundMatch = FALSE; - - // Search through the string to find a matching quote - while((ch = getNextChar(pXML))) - { - if (ch==chTemp) { nFoundMatch = TRUE; break; } - if (ch==_CXML('<')) break; - } +static NextToken GetNextToken(XML *pXML, int *pcbToken, + enum XMLTokenTypeTag *pType) { + NextToken result; + XMLCHAR ch; + XMLCHAR chTemp; + int indexStart, nFoundMatch, nIsText = FALSE; + result.pClr = NULL; // prevent warning + + // Find next non-white space character + do { + indexStart = pXML->nIndex; + ch = getNextChar(pXML); + } while XML_isSPACECHAR(ch); + + if (ch) { + // Cache the current string pointer + result.pStr = &pXML->lpXML[indexStart]; + + // First check whether the token is in the clear tag list (meaning it + // does not need formatting). + ALLXMLClearTag *ctag = XMLClearTags; + do { + if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen) == 0) { + result.pClr = ctag; + pXML->nIndex += ctag->openTagLen - 1; + *pType = eTokenClear; + return result; + } + ctag++; + } while (ctag->lpszOpen); + + // If we didn't find a clear tag then check for standard tokens + switch (ch) { + // Check for quotes + case _CXML('\''): + case _CXML('\"'): + // Type of token + *pType = eTokenQuotedText; + chTemp = ch; + + // Set the size + nFoundMatch = FALSE; + + // Search through the string to find a matching quote + while ((ch = getNextChar(pXML))) { + if (ch == chTemp) { + nFoundMatch = TRUE; + break; + } + if (ch == _CXML('<')) + break; + } - // If we failed to find a matching quote - if (nFoundMatch == FALSE) - { - pXML->nIndex=indexStart+1; - nIsText=TRUE; - break; - } + // If we failed to find a matching quote + if (nFoundMatch == FALSE) { + pXML->nIndex = indexStart + 1; + nIsText = TRUE; + break; + } -// 4.02.2002 -// if (FindNonWhiteSpace(pXML)) pXML->nIndex--; + // 4.02.2002 + // if (FindNonWhiteSpace(pXML)) pXML->nIndex--; - break; + break; - // Equals (used with attribute values) - case _CXML('='): - *pType = eTokenEquals; - break; + // Equals (used with attribute values) + case _CXML('='): + *pType = eTokenEquals; + break; - // Close tag - case _CXML('>'): - *pType = eTokenCloseTag; - break; + // Close tag + case _CXML('>'): + *pType = eTokenCloseTag; + break; - // Check for tag start and tag end - case _CXML('<'): + // Check for tag start and tag end + case _CXML('<'): - // Peek at the next character to see if we have an end tag 'lpXML[pXML->nIndex]; + // Peek at the next character to see if we have an end tag 'lpXML[pXML->nIndex]; - // If we have a tag end... - if (chTemp == _CXML('/')) - { - // Set the type and ensure we point at the next character - getNextChar(pXML); - *pType = eTokenTagEnd; - } + // If we have a tag end... + if (chTemp == _CXML('/')) { + // Set the type and ensure we point at the next character + getNextChar(pXML); + *pType = eTokenTagEnd; + } - // If we have an XML declaration tag - else if (chTemp == _CXML('?')) - { + // If we have an XML declaration tag + else if (chTemp == _CXML('?')) { - // Set the type and ensure we point at the next character - getNextChar(pXML); - *pType = eTokenDeclaration; - } + // Set the type and ensure we point at the next character + getNextChar(pXML); + *pType = eTokenDeclaration; + } - // Otherwise we must have a start tag - else - { - *pType = eTokenTagStart; - } - break; + // Otherwise we must have a start tag + else { + *pType = eTokenTagStart; + } + break; - // Check to see if we have a short hand type end tag ('/>'). - case _CXML('/'): + // Check to see if we have a short hand type end tag ('/>'). + case _CXML('/'): - // Peek at the next character to see if we have a short end tag '/>' - chTemp = pXML->lpXML[pXML->nIndex]; + // Peek at the next character to see if we have a short end tag '/>' + chTemp = pXML->lpXML[pXML->nIndex]; - // If we have a short hand end tag... - if (chTemp == _CXML('>')) - { - // Set the type and ensure we point at the next character - getNextChar(pXML); - *pType = eTokenShortHandClose; - break; - } + // If we have a short hand end tag... + if (chTemp == _CXML('>')) { + // Set the type and ensure we point at the next character + getNextChar(pXML); + *pType = eTokenShortHandClose; + break; + } - // If we haven't found a short hand closing tag then drop into the - // text process + // If we haven't found a short hand closing tag then drop into the + // text process - // Other characters - default: - nIsText = TRUE; - } + // Other characters + default: + nIsText = TRUE; + } - // If this is a TEXT node - if (nIsText) - { - // Indicate we are dealing with text - *pType = eTokenText; - while((ch = getNextChar(pXML))) - { - if XML_isSPACECHAR(ch) - { - indexStart++; break; - - } else if (ch==_CXML('/')) - { - // If we find a slash then this maybe text or a short hand end tag - // Peek at the next character to see it we have short hand end tag - ch=pXML->lpXML[pXML->nIndex]; - // If we found a short hand end tag then we need to exit the loop - if (ch==_CXML('>')) { pXML->nIndex--; break; } - - } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('='))) - { - pXML->nIndex--; break; - } - } + // If this is a TEXT node + if (nIsText) { + // Indicate we are dealing with text + *pType = eTokenText; + while ((ch = getNextChar(pXML))) { + if XML_isSPACECHAR (ch) { + indexStart++; + break; + + } else if (ch == _CXML('/')) { + // If we find a slash then this maybe text or a short hand end tag + // Peek at the next character to see it we have short hand end tag + ch = pXML->lpXML[pXML->nIndex]; + // If we found a short hand end tag then we need to exit the loop + if (ch == _CXML('>')) { + pXML->nIndex--; + break; + } + + } else if ((ch == _CXML('<')) || (ch == _CXML('>')) || + (ch == _CXML('='))) { + pXML->nIndex--; + break; } - *pcbToken = pXML->nIndex-indexStart; - } else - { - // If we failed to obtain a valid character - *pcbToken = 0; - *pType = eTokenError; - result.pStr=NULL; + } } + *pcbToken = pXML->nIndex - indexStart; + } else { + // If we failed to obtain a valid character + *pcbToken = 0; + *pType = eTokenError; + result.pStr = NULL; + } - return result; + return result; } -XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) -{ - if (!d) { free(lpszName); return NULL; } - if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName); - d->lpszName=lpszName; - return lpszName; +XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) { + if (!d) { + free(lpszName); + return NULL; + } + if (d->lpszName && (lpszName != d->lpszName)) + free((void *)d->lpszName); + d->lpszName = lpszName; + return lpszName; } // private: -XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; } -XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) -{ - d=(XMLNodeData*)malloc(sizeof(XMLNodeData)); - d->ref_count=1; +XMLNode::XMLNode(struct XMLNodeDataTag *p) { + d = p; + (p->ref_count)++; +} +XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) { + d = (XMLNodeData *)malloc(sizeof(XMLNodeData)); + d->ref_count = 1; - d->lpszName=NULL; - d->nChild= 0; - d->nText = 0; - d->nClear = 0; - d->nAttribute = 0; + d->lpszName = NULL; + d->nChild = 0; + d->nText = 0; + d->nClear = 0; + d->nAttribute = 0; - d->isDeclaration = isDeclaration; + d->isDeclaration = isDeclaration; - d->pParent = pParent; - d->pChild= NULL; - d->pText= NULL; - d->pClear= NULL; - d->pAttribute= NULL; - d->pOrder= NULL; + d->pParent = pParent; + d->pChild = NULL; + d->pText = NULL; + d->pClear = NULL; + d->pAttribute = NULL; + d->pOrder = NULL; - updateName_WOSD(lpszName); + updateName_WOSD(lpszName); } -XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); } -XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); } +XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { + return XMLNode(NULL, lpszName, isDeclaration); +} +XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { + return XMLNode(NULL, stringDup(lpszName), isDeclaration); +} #define MEMORYINCREASE 50 -static inline void myFree(void *p) { if (p) free(p); } -static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) -{ - if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); } - if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem); -// if (!p) -// { -// printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220); -// } - return p; +static inline void myFree(void *p) { + if (p) + free(p); +} +static inline void *myRealloc(void *p, int newsize, int memInc, + int sizeofElem) { + if (p == NULL) { + if (memInc) + return malloc(memInc * sizeofElem); + return malloc(sizeofElem); + } + if ((memInc == 0) || ((newsize % memInc) == 0)) + p = realloc(p, (newsize + memInc) * sizeofElem); + // if (!p) + // { + // printf("XMLParser Error: Not enough memory! Aborting...\n"); + // exit(220); + // } + return p; } // private: -XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) -{ - if (index<0) return -1; - int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i; +XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, + XMLElementType xxtype) { + if (index < 0) + return -1; + int i = 0, j = (int)((index << 2) + xxtype), *o = d->pOrder; + while (o[i] != j) + i++; + return i; } // private: // update "order" information when deleting a content of a XMLNode -int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) -{ - int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t); - memmove(o+i, o+i+1, (n-i)*sizeof(int)); - for (;ipOrder=(int)realloc(d->pOrder,n*sizeof(int)); - // but we skip reallocation because it's too time consuming. - // Anyway, at the end, it will be free'd completely at once. - return i; -} - -void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype) -{ - // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") - // out: *_pos is the index inside p - p=myRealloc(p,(nc+1),memoryIncrease,size); - int n=d->nChild+d->nText+d->nClear; - d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int)); - int pos=*_pos,*o=d->pOrder; - - if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } +int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) { + int n = d->nChild + d->nText + d->nClear, *o = d->pOrder, + i = findPosition(d, index, t); + memmove(o + i, o + i + 1, (n - i) * sizeof(int)); + for (; i < n; i++) + if ((o[i] & 3) == (int)t) + o[i] -= 4; + // We should normally do: + // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int)); + // but we skip reallocation because it's too time consuming. + // Anyway, at the end, it will be free'd completely at once. + return i; +} + +void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p, + int size, XMLElementType xtype) { + // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") + // out: *_pos is the index inside p + p = myRealloc(p, (nc + 1), memoryIncrease, size); + int n = d->nChild + d->nText + d->nClear; + d->pOrder = + (int *)myRealloc(d->pOrder, n + 1, memoryIncrease * 3, sizeof(int)); + int pos = *_pos, *o = d->pOrder; + + if ((pos < 0) || (pos >= n)) { + *_pos = nc; + o[n] = (int)((nc << 2) + xtype); + return p; + } - int i=pos; - memmove(o+i+1, o+i, (n-i)*sizeof(int)); + int i = pos; + memmove(o + i + 1, o + i, (n - i) * sizeof(int)); - while ((pos>2; - memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size); + *_pos = pos = o[pos] >> 2; + memmove(((char *)p) + (pos + 1) * size, ((char *)p) + pos * size, + (nc - pos) * size); - return p; + return p; } // Add a child node to the given element. -XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos) -{ - if (!lpszName) return emptyXMLNode; - d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=NULL; - d->pChild[pos]=XMLNode(d,lpszName,isDeclaration); - d->nChild++; - return d->pChild[pos]; +XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, + char isDeclaration, int pos) { + if (!lpszName) + return emptyXMLNode; + d->pChild = (XMLNode *)addToOrder(memoryIncrease, &pos, d->nChild, d->pChild, + sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = NULL; + d->pChild[pos] = XMLNode(d, lpszName, isDeclaration); + d->nChild++; + return d->pChild[pos]; } // Add an attribute to an element. -XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev) -{ - if (!lpszName) return &emptyXMLAttribute; - if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; } - int nc=d->nAttribute; - d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute)); - XMLAttribute *pAttr=d->pAttribute+nc; - pAttr->lpszName = lpszName; - pAttr->lpszValue = lpszValuev; - d->nAttribute++; - return pAttr; +XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, XMLSTR lpszName, + XMLSTR lpszValuev) { + if (!lpszName) + return &emptyXMLAttribute; + if (!d) { + myFree(lpszName); + myFree(lpszValuev); + return &emptyXMLAttribute; + } + int nc = d->nAttribute; + d->pAttribute = (XMLAttribute *)myRealloc( + d->pAttribute, (nc + 1), memoryIncrease, sizeof(XMLAttribute)); + XMLAttribute *pAttr = d->pAttribute + nc; + pAttr->lpszName = lpszName; + pAttr->lpszValue = lpszValuev; + d->nAttribute++; + return pAttr; } // Add text to the element. -XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) -{ - if (!lpszValue) return NULL; - if (!d) { myFree(lpszValue); return NULL; } - d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText); - d->pText[pos]=lpszValue; - d->nText++; - return lpszValue; +XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) { + if (!lpszValue) + return NULL; + if (!d) { + myFree(lpszValue); + return NULL; + } + d->pText = (XMLCSTR *)addToOrder(memoryIncrease, &pos, d->nText, d->pText, + sizeof(XMLSTR), eNodeText); + d->pText[pos] = lpszValue; + d->nText++; + return lpszValue; } // Add clear (unformatted) text to the element. -XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) -{ - if (!lpszValue) return &emptyXMLClear; - if (!d) { myFree(lpszValue); return &emptyXMLClear; } - d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear); - XMLClear *pNewClear=d->pClear+pos; - pNewClear->lpszValue = lpszValue; - if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen; - if (!lpszClose) lpszClose=XMLClearTags->lpszClose; - pNewClear->lpszOpenTag = lpszOpen; - pNewClear->lpszCloseTag = lpszClose; - d->nClear++; - return pNewClear; +XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, + XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) { + if (!lpszValue) + return &emptyXMLClear; + if (!d) { + myFree(lpszValue); + return &emptyXMLClear; + } + d->pClear = (XMLClear *)addToOrder(memoryIncrease, &pos, d->nClear, d->pClear, + sizeof(XMLClear), eNodeClear); + XMLClear *pNewClear = d->pClear + pos; + pNewClear->lpszValue = lpszValue; + if (!lpszOpen) + lpszOpen = XMLClearTags->lpszOpen; + if (!lpszClose) + lpszClose = XMLClearTags->lpszClose; + pNewClear->lpszOpenTag = lpszOpen; + pNewClear->lpszCloseTag = lpszClose; + d->nClear++; + return pNewClear; } // private: // Parse a clear (unformatted) type node. -char XMLNode::parseClearTag(void *px, void *_pClear) -{ - XML *pXML=(XML *)px; - ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear); - int cbTemp=0; - XMLCSTR lpszTemp=NULL; - XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex]; - static XMLCSTR docTypeEnd=_CXML("]>"); - - // Find the closing tag - // Seems the ')) { lpszTemp=pCh; break; } +char XMLNode::parseClearTag(void *px, void *_pClear) { + XML *pXML = (XML *)px; + ALLXMLClearTag pClear = *((ALLXMLClearTag *)_pClear); + int cbTemp = 0; + XMLCSTR lpszTemp = NULL; + XMLCSTR lpXML = &pXML->lpXML[pXML->nIndex]; + static XMLCSTR docTypeEnd = _CXML("]>"); + + // Find the closing tag + // Seems the ')) { + lpszTemp = pCh; + break; + } #ifdef _XMLWIDECHAR - pCh++; + pCh++; #else - pCh+=XML_ByteTable[(unsigned char)(*pCh)]; + pCh += XML_ByteTable[(unsigned char)(*pCh)]; #endif - } - } else lpszTemp=xstrstr(lpXML, pClear.lpszClose); - - if (lpszTemp) - { - // Cache the size and increment the index - cbTemp = (int)(lpszTemp - lpXML); - - pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose); - - // Add the clear node to the current element - addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1); - return 0; } + } else + lpszTemp = xstrstr(lpXML, pClear.lpszClose); - // If we failed to find the end tag - pXML->error = eXMLErrorUnmatchedEndClearTag; - return 1; -} + if (lpszTemp) { + // Cache the size and increment the index + cbTemp = (int)(lpszTemp - lpXML); -void XMLNode::exactMemory(XMLNodeData *d) -{ - if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int)); - if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode)); - if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute)); - if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR)); - if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear)); -} + pXML->nIndex += cbTemp + (int)xstrlen(pClear.lpszClose); -char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) -{ - XML *pXML=(XML *)pa; - XMLCSTR lpszText=pXML->lpszText; - if (!lpszText) return 0; - if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++; - int cbText = (int)(tokenPStr - lpszText); - if (!cbText) { pXML->lpszText=NULL; return 0; } - if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; } - if (!cbText) { pXML->lpszText=NULL; return 0; } - XMLSTR lpt=fromXMLString(lpszText,cbText,pXML); - if (!lpt) return 1; - pXML->lpszText=NULL; - if (removeCommentsInMiddleOfText && d->nText && d->nClear) - { - // if the previous insertion was a comment () AND - // if the previous previous insertion was a text then, delete the comment and append the text - int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder; - if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText)) - { - int i=o[n]>>2; - if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen) - { - deleteClear(i); - i=o[n-1]>>2; - n=xstrlen(d->pText[i]); - int n2=xstrlen(lpt)+1; - d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR)); - if (!d->pText[i]) return 1; - memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR)); - free(lpt); - return 0; - } - } - } - addText_priv(MEMORYINCREASE,lpt,-1); + // Add the clear node to the current element + addClear_priv(MEMORYINCREASE, stringDup(lpXML, cbTemp), pClear.lpszOpen, + pClear.lpszClose, -1); + return 0; + } + + // If we failed to find the end tag + pXML->error = eXMLErrorUnmatchedEndClearTag; + return 1; +} + +void XMLNode::exactMemory(XMLNodeData *d) { + if (d->pOrder) + d->pOrder = (int *)realloc(d->pOrder, (d->nChild + d->nText + d->nClear) * + sizeof(int)); + if (d->pChild) + d->pChild = (XMLNode *)realloc(d->pChild, d->nChild * sizeof(XMLNode)); + if (d->pAttribute) + d->pAttribute = (XMLAttribute *)realloc( + d->pAttribute, d->nAttribute * sizeof(XMLAttribute)); + if (d->pText) + d->pText = (XMLCSTR *)realloc(d->pText, d->nText * sizeof(XMLSTR)); + if (d->pClear) + d->pClear = (XMLClear *)realloc(d->pClear, d->nClear * sizeof(XMLClear)); +} + +char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) { + XML *pXML = (XML *)pa; + XMLCSTR lpszText = pXML->lpszText; + if (!lpszText) + return 0; + if (dropWhiteSpace) + while (XML_isSPACECHAR(*lpszText) && (lpszText != tokenPStr)) + lpszText++; + int cbText = (int)(tokenPStr - lpszText); + if (!cbText) { + pXML->lpszText = NULL; return 0; + } + if (dropWhiteSpace) { + cbText--; + while ((cbText) && XML_isSPACECHAR(lpszText[cbText])) + cbText--; + cbText++; + } + if (!cbText) { + pXML->lpszText = NULL; + return 0; + } + XMLSTR lpt = fromXMLString(lpszText, cbText, pXML); + if (!lpt) + return 1; + pXML->lpszText = NULL; + if (removeCommentsInMiddleOfText && d->nText && d->nClear) { + // if the previous insertion was a comment () AND + // if the previous previous insertion was a text then, delete the comment + // and append the text + int n = d->nChild + d->nText + d->nClear - 1, *o = d->pOrder; + if (((o[n] & 3) == eNodeClear) && ((o[n - 1] & 3) == eNodeText)) { + int i = o[n] >> 2; + if (d->pClear[i].lpszOpenTag == XMLClearTags[2].lpszOpen) { + deleteClear(i); + i = o[n - 1] >> 2; + n = xstrlen(d->pText[i]); + int n2 = xstrlen(lpt) + 1; + d->pText[i] = + (XMLSTR)realloc((void *)d->pText[i], (n + n2) * sizeof(XMLCHAR)); + if (!d->pText[i]) + return 1; + memcpy((void *)(d->pText[i] + n), lpt, n2 * sizeof(XMLCHAR)); + free(lpt); + return 0; + } + } + } + addText_priv(MEMORYINCREASE, lpt, -1); + return 0; } // private: // Recursively parse an XML element. -int XMLNode::ParseXMLElement(void *pa) -{ - XML *pXML=(XML *)pa; - int cbToken; - enum XMLTokenTypeTag xtype; - NextToken token; - XMLCSTR lpszTemp=NULL; - int cbTemp=0; - char nDeclaration; - XMLNode pNew; - enum Status status; // inside or outside a tag - enum Attrib attrib = eAttribName; - - assert(pXML); - - // If this is the first call to the function - if (pXML->nFirst) - { - // Assume we are outside of a tag definition - pXML->nFirst = FALSE; - status = eOutsideTag; - } else - { - // If this is not the first call then we should only be called when inside a tag. - status = eInsideTag; - } - - // Iterate through the tokens in the document - for(;;) - { - // Obtain the next token - token = GetNextToken(pXML, &cbToken, &xtype); - - if (xtype != eTokenError) - { - // Check the current status - switch(status) - { - - // If we are outside of a tag definition - case eOutsideTag: - - // Check what type of token we obtained - switch(xtype) - { - // If we have found text or quoted text - case eTokenText: - case eTokenCloseTag: /* '>' */ - case eTokenShortHandClose: /* '/>' */ - case eTokenQuotedText: - case eTokenEquals: - break; - - // If we found a start tag '<' and declarations 'error = eXMLErrorMissingTagName; - return FALSE; - } - - // If we found a new element which is the same as this - // element then we need to pass this back to the caller.. +int XMLNode::ParseXMLElement(void *pa) { + XML *pXML = (XML *)pa; + int cbToken; + enum XMLTokenTypeTag xtype; + NextToken token; + XMLCSTR lpszTemp = NULL; + int cbTemp = 0; + char nDeclaration; + XMLNode pNew; + enum Status status; // inside or outside a tag + enum Attrib attrib = eAttribName; + + assert(pXML); + + // If this is the first call to the function + if (pXML->nFirst) { + // Assume we are outside of a tag definition + pXML->nFirst = FALSE; + status = eOutsideTag; + } else { + // If this is not the first call then we should only be called when inside a + // tag. + status = eInsideTag; + } + + // Iterate through the tokens in the document + for (;;) { + // Obtain the next token + token = GetNextToken(pXML, &cbToken, &xtype); + + if (xtype != eTokenError) { + // Check the current status + switch (status) { + + // If we are outside of a tag definition + case eOutsideTag: + + // Check what type of token we obtained + switch (xtype) { + // If we have found text or quoted text + case eTokenText: + case eTokenCloseTag: /* '>' */ + case eTokenShortHandClose: /* '/>' */ + case eTokenQuotedText: + case eTokenEquals: + break; + + // If we found a start tag '<' and declarations 'error = eXMLErrorMissingTagName; + return FALSE; + } + + // If we found a new element which is the same as this + // element then we need to pass this back to the caller.. #ifdef APPROXIMATE_PARSING - if (d->lpszName && - myTagCompare(d->lpszName, token.pStr) == 0) - { - // Indicate to the caller that it needs to create a - // new element. - pXML->lpNewElement = token.pStr; - pXML->cbNewElement = cbToken; - return TRUE; - } else + if (d->lpszName && myTagCompare(d->lpszName, token.pStr) == 0) { + // Indicate to the caller that it needs to create a + // new element. + pXML->lpNewElement = token.pStr; + pXML->cbNewElement = cbToken; + return TRUE; + } else #endif - { - // If the name of the new element differs from the name of - // the current element we need to add the new element to - // the current one and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1); - - while (!pNew.isEmpty()) - { - // Callself to process the new node. If we return - // FALSE this means we dont have any more - // processing to do... - - if (!pNew.ParseXMLElement(pXML)) return FALSE; - else - { - // If the call to recurse this function - // evented in a end tag specified in XML then - // we need to unwind the calls to this - // function until we find the appropriate node - // (the element name and end tag name must - // match) - if (pXML->cbEndTag) - { - // If we are back at the root node then we - // have an unmatched end tag - if (!d->lpszName) - { - pXML->error=eXMLErrorUnmatchedEndTag; - return FALSE; - } - - // If the end tag matches the name of this - // element then we only need to unwind - // once more... - - if (myTagCompare(d->lpszName, pXML->lpEndTag)==0) - { - pXML->cbEndTag = 0; - } - - return TRUE; - } else - if (pXML->cbNewElement) - { - // If the call indicated a new element is to - // be created on THIS element. - - // If the name of this element matches the - // name of the element we need to create - // then we need to return to the caller - // and let it process the element. - - if (myTagCompare(d->lpszName, pXML->lpNewElement)==0) - { - return TRUE; - } - - // Add the new element and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1); - pXML->cbNewElement = 0; - } - else - { - // If we didn't have a new element to create - pNew = emptyXMLNode; - - } - } - } - } - break; + { + // If the name of the new element differs from the name of + // the current element we need to add the new element to + // the current one and recurse + pNew = addChild_priv(MEMORYINCREASE, + stringDup(token.pStr, cbToken), + nDeclaration, -1); + + while (!pNew.isEmpty()) { + // Callself to process the new node. If we return + // FALSE this means we dont have any more + // processing to do... + + if (!pNew.ParseXMLElement(pXML)) + return FALSE; + else { + // If the call to recurse this function + // evented in a end tag specified in XML then + // we need to unwind the calls to this + // function until we find the appropriate node + // (the element name and end tag name must + // match) + if (pXML->cbEndTag) { + // If we are back at the root node then we + // have an unmatched end tag + if (!d->lpszName) { + pXML->error = eXMLErrorUnmatchedEndTag; + return FALSE; + } - // If we found an end tag - case eTokenTagEnd: + // If the end tag matches the name of this + // element then we only need to unwind + // once more... - // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; + if (myTagCompare(d->lpszName, pXML->lpEndTag) == 0) { + pXML->cbEndTag = 0; + } - // Find the name of the end tag - token = GetNextToken(pXML, &cbTemp, &xtype); + return TRUE; + } else if (pXML->cbNewElement) { + // If the call indicated a new element is to + // be created on THIS element. - // The end tag should be text - if (xtype != eTokenText) - { - pXML->error = eXMLErrorMissingEndTagName; - return FALSE; - } - lpszTemp = token.pStr; + // If the name of this element matches the + // name of the element we need to create + // then we need to return to the caller + // and let it process the element. - // After the end tag we should find a closing tag - token = GetNextToken(pXML, &cbToken, &xtype); - if (xtype != eTokenCloseTag) - { - pXML->error = eXMLErrorMissingEndTagName; - return FALSE; + if (myTagCompare(d->lpszName, pXML->lpNewElement) == 0) { + return TRUE; + } + + // Add the new element and recurse + pNew = addChild_priv( + MEMORYINCREASE, + stringDup(pXML->lpNewElement, pXML->cbNewElement), 0, + -1); + pXML->cbNewElement = 0; + } else { + // If we didn't have a new element to create + pNew = emptyXMLNode; } - pXML->lpszText=pXML->lpXML+pXML->nIndex; - - // We need to return to the previous caller. If the name - // of the tag cannot be found we need to keep returning to - // caller until we find a match - if (myTagCompare(d->lpszName, lpszTemp) != 0) + } + } + } + break; + + // If we found an end tag + case eTokenTagEnd: + + // If we have node text then add this to the element + if (maybeAddTxT(pXML, token.pStr)) + return FALSE; + + // Find the name of the end tag + token = GetNextToken(pXML, &cbTemp, &xtype); + + // The end tag should be text + if (xtype != eTokenText) { + pXML->error = eXMLErrorMissingEndTagName; + return FALSE; + } + lpszTemp = token.pStr; + + // After the end tag we should find a closing tag + token = GetNextToken(pXML, &cbToken, &xtype); + if (xtype != eTokenCloseTag) { + pXML->error = eXMLErrorMissingEndTagName; + return FALSE; + } + pXML->lpszText = pXML->lpXML + pXML->nIndex; + + // We need to return to the previous caller. If the name + // of the tag cannot be found we need to keep returning to + // caller until we find a match + if (myTagCompare(d->lpszName, lpszTemp) != 0) #ifdef STRICT_PARSING - { - pXML->error=eXMLErrorUnmatchedEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; - return FALSE; - } + { + pXML->error = eXMLErrorUnmatchedEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; + return FALSE; + } #else - { - pXML->error=eXMLErrorMissingEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; - pXML->lpEndTag = lpszTemp; - pXML->cbEndTag = cbTemp; - } + { + pXML->error = eXMLErrorMissingEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; + pXML->lpEndTag = lpszTemp; + pXML->cbEndTag = cbTemp; + } #endif - // Return to the caller + // Return to the caller + exactMemory(d); + return TRUE; + + // If we found a clear (unformatted) token + case eTokenClear: + // If we have node text then add this to the element + if (maybeAddTxT(pXML, token.pStr)) + return FALSE; + if (parseClearTag(pXML, token.pClr)) + return FALSE; + pXML->lpszText = pXML->lpXML + pXML->nIndex; + break; + + default: + break; + } + break; + + // If we are inside a tag definition we need to search for attributes + case eInsideTag: + + // Check what part of the attribute (name, equals, value) we + // are looking for. + switch (attrib) { + // If we are looking for a new attribute + case eAttribName: + + // Check what the current token type is + switch (xtype) { + // If the current type is text... + // Eg. 'attribute' + case eTokenText: + // Cache the token then indicate that we are next to + // look for the equals + lpszTemp = token.pStr; + cbTemp = cbToken; + attrib = eAttribEquals; + break; + + // If we found a closing tag... + // Eg. '>' + case eTokenCloseTag: + // We are now outside the tag + status = eOutsideTag; + pXML->lpszText = pXML->lpXML + pXML->nIndex; + break; + + // If we found a short hand '/>' closing tag then we can + // return to the caller + case eTokenShortHandClose: + exactMemory(d); + pXML->lpszText = pXML->lpXML + pXML->nIndex; + return TRUE; + + // Errors... + case eTokenQuotedText: /* '"SomeText"' */ + case eTokenTagStart: /* '<' */ + case eTokenTagEnd: /* 'error = eXMLErrorUnexpectedToken; + return FALSE; + default: + break; + } + break; + + // If we are looking for an equals + case eAttribEquals: + // Check what the current token type is + switch (xtype) { + // If the current type is text... + // Eg. 'Attribute AnotherAttribute' + case eTokenText: + // Add the unvalued attribute to the list + addAttribute_priv(MEMORYINCREASE, stringDup(lpszTemp, cbTemp), + NULL); + // Cache the token then indicate. We are next to + // look for the equals attribute + lpszTemp = token.pStr; + cbTemp = cbToken; + break; + + // If we found a closing tag 'Attribute >' or a short hand + // closing tag 'Attribute />' + case eTokenShortHandClose: + case eTokenCloseTag: + // If we are a declaration element 'lpszText = pXML->lpXML + pXML->nIndex; + + if (d->isDeclaration && + (lpszTemp[cbTemp - 1]) == _CXML('?')) { + cbTemp--; + if (d->pParent && d->pParent->pParent) + xtype = eTokenShortHandClose; + } + + if (cbTemp) { + // Add the unvalued attribute to the list + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), NULL); + } + + // If this is the end of the tag then return to the caller + if (xtype == eTokenShortHandClose) { exactMemory(d); return TRUE; + } - // If we found a clear (unformatted) token - case eTokenClear: - // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; - if (parseClearTag(pXML, token.pClr)) return FALSE; - pXML->lpszText=pXML->lpXML+pXML->nIndex; - break; + // We are now outside the tag + status = eOutsideTag; + break; + // If we found the equals token... + // Eg. 'Attribute =' + case eTokenEquals: + // Indicate that we next need to search for the value + // for the attribute + attrib = eAttribValue; + break; + + // Errors... + case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ + case eTokenTagStart: /* 'Attribute <' */ + case eTokenTagEnd: /* 'Attribute error = eXMLErrorUnexpectedToken; + return FALSE; default: - break; - } - break; - - // If we are inside a tag definition we need to search for attributes - case eInsideTag: - - // Check what part of the attribute (name, equals, value) we - // are looking for. - switch(attrib) - { - // If we are looking for a new attribute - case eAttribName: - - // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'attribute' - case eTokenText: - // Cache the token then indicate that we are next to - // look for the equals - lpszTemp = token.pStr; - cbTemp = cbToken; - attrib = eAttribEquals; - break; - - // If we found a closing tag... - // Eg. '>' - case eTokenCloseTag: - // We are now outside the tag - status = eOutsideTag; - pXML->lpszText=pXML->lpXML+pXML->nIndex; - break; - - // If we found a short hand '/>' closing tag then we can - // return to the caller - case eTokenShortHandClose: - exactMemory(d); - pXML->lpszText=pXML->lpXML+pXML->nIndex; - return TRUE; - - // Errors... - case eTokenQuotedText: /* '"SomeText"' */ - case eTokenTagStart: /* '<' */ - case eTokenTagEnd: /* 'error = eXMLErrorUnexpectedToken; - return FALSE; - default: break; - } - break; - - // If we are looking for an equals - case eAttribEquals: - // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'Attribute AnotherAttribute' - case eTokenText: - // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); - // Cache the token then indicate. We are next to - // look for the equals attribute - lpszTemp = token.pStr; - cbTemp = cbToken; - break; - - // If we found a closing tag 'Attribute >' or a short hand - // closing tag 'Attribute />' - case eTokenShortHandClose: - case eTokenCloseTag: - // If we are a declaration element 'lpszText=pXML->lpXML+pXML->nIndex; - - if (d->isDeclaration && - (lpszTemp[cbTemp-1]) == _CXML('?')) - { - cbTemp--; - if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose; - } - - if (cbTemp) - { - // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); - } - - // If this is the end of the tag then return to the caller - if (xtype == eTokenShortHandClose) - { - exactMemory(d); - return TRUE; - } - - // We are now outside the tag - status = eOutsideTag; - break; - - // If we found the equals token... - // Eg. 'Attribute =' - case eTokenEquals: - // Indicate that we next need to search for the value - // for the attribute - attrib = eAttribValue; - break; - - // Errors... - case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ - case eTokenTagStart: /* 'Attribute <' */ - case eTokenTagEnd: /* 'Attribute error = eXMLErrorUnexpectedToken; - return FALSE; - default: break; + break; + } + break; + + // If we are looking for an attribute value + case eAttribValue: + // Check what the current token type is + switch (xtype) { + // If the current type is text or quoted text... + // Eg. 'Attribute = "Value"' or 'Attribute = Value' or + // 'Attribute = 'Value''. + case eTokenText: + case eTokenQuotedText: + // If we are a declaration element 'isDeclaration && + (token.pStr[cbToken - 1]) == _CXML('?')) { + cbToken--; + } + + if (cbTemp) { + // Add the valued attribute to the list + if (xtype == eTokenQuotedText) { + token.pStr++; + cbToken -= 2; } - break; - - // If we are looking for an attribute value - case eAttribValue: - // Check what the current token type is - switch(xtype) - { - // If the current type is text or quoted text... - // Eg. 'Attribute = "Value"' or 'Attribute = Value' or - // 'Attribute = 'Value''. - case eTokenText: - case eTokenQuotedText: - // If we are a declaration element 'isDeclaration && - (token.pStr[cbToken-1]) == _CXML('?')) - { - cbToken--; - } - - if (cbTemp) - { - // Add the valued attribute to the list - if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; } - XMLSTR attrVal=(XMLSTR)token.pStr; - if (attrVal) - { - attrVal=fromXMLString(attrVal,cbToken,pXML); - if (!attrVal) return FALSE; - } - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal); - } - - // Indicate we are searching for a new attribute - attrib = eAttribName; - break; - - // Errors... - case eTokenTagStart: /* 'Attr = <' */ - case eTokenTagEnd: /* 'Attr = ' */ - case eTokenShortHandClose: /* "Attr = />" */ - case eTokenEquals: /* 'Attr = =' */ - case eTokenDeclaration: /* 'Attr = error = eXMLErrorUnexpectedToken; + XMLSTR attrVal = (XMLSTR)token.pStr; + if (attrVal) { + attrVal = fromXMLString(attrVal, cbToken, pXML); + if (!attrVal) return FALSE; - break; - default: break; } - } - } - } - // If we failed to obtain the next token - else - { - if ((!d->isDeclaration)&&(d->pParent)) - { + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), attrVal); + } + + // Indicate we are searching for a new attribute + attrib = eAttribName; + break; + + // Errors... + case eTokenTagStart: /* 'Attr = <' */ + case eTokenTagEnd: /* 'Attr = ' */ + case eTokenShortHandClose: /* "Attr = />" */ + case eTokenEquals: /* 'Attr = =' */ + case eTokenDeclaration: /* 'Attr = error = eXMLErrorUnexpectedToken; + return FALSE; + break; + default: + break; + } + } + } + } + // If we failed to obtain the next token + else { + if ((!d->isDeclaration) && (d->pParent)) { #ifdef STRICT_PARSING - pXML->error=eXMLErrorUnmatchedEndTag; + pXML->error = eXMLErrorUnmatchedEndTag; #else - pXML->error=eXMLErrorMissingEndTag; + pXML->error = eXMLErrorMissingEndTag; #endif - pXML->nIndexMissigEndTag=pXML->nIndex; - } - maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex); - return FALSE; - } + pXML->nIndexMissigEndTag = pXML->nIndex; + } + maybeAddTxT(pXML, pXML->lpXML + pXML->nIndex); + return FALSE; } + } } // Count the number of lines and columns in an XML string. -static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) -{ - XMLCHAR ch; - assert(lpXML); - assert(pResults); - - struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; - - pResults->nLine = 1; - pResults->nColumn = 1; - while (xml.nIndexnColumn++; - else - { - pResults->nLine++; - pResults->nColumn=1; - } +static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, + XMLResults *pResults) { + XMLCHAR ch; + assert(lpXML); + assert(pResults); + + struct XML xml = {lpXML, lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE}; + + pResults->nLine = 1; + pResults->nColumn = 1; + while (xml.nIndex < nUpto) { + ch = getNextChar(&xml); + if (ch != _CXML('\n')) + pResults->nColumn++; + else { + pResults->nLine++; + pResults->nColumn = 1; } + } } // Parse XML and return the root element. -XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) -{ - if (!lpszXML) - { - if (pResults) - { - pResults->error=eXMLErrorNoElements; - pResults->nLine=0; - pResults->nColumn=0; +XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, + XMLResults *pResults) { + if (!lpszXML) { + if (pResults) { + pResults->error = eXMLErrorNoElements; + pResults->nLine = 0; + pResults->nColumn = 0; + } + return emptyXMLNode; + } + + XMLNode xnode(NULL, NULL, FALSE); + struct XML xml = {lpszXML, lpszXML, 0, 0, eXMLErrorNone, + NULL, 0, NULL, 0, TRUE}; + + // Create header element + xnode.ParseXMLElement(&xml); + enum XMLError error = xml.error; + if (!xnode.nChildNode()) + error = eXMLErrorNoXMLTagFound; + if ((xnode.nChildNode() == 1) && (xnode.nElement() == 1)) + xnode = xnode.getChildNode(); // skip the empty node + + // If no error occurred + if ((error == eXMLErrorNone) || (error == eXMLErrorMissingEndTag) || + (error == eXMLErrorNoXMLTagFound)) { + XMLCSTR name = xnode.getName(); + if (tag && (*tag) && ((!name) || (xstricmp(name, tag)))) { + xnode = xnode.getChildNode(tag); + if (xnode.isEmpty()) { + if (pResults) { + pResults->error = eXMLErrorFirstTagNotFound; + pResults->nLine = 0; + pResults->nColumn = 0; } return emptyXMLNode; + } } - - XMLNode xnode(NULL,NULL,FALSE); - struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; - - // Create header element - xnode.ParseXMLElement(&xml); - enum XMLError error = xml.error; - if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound; - if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node - - // If no error occurred - if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound)) - { - XMLCSTR name=xnode.getName(); - if (tag&&(*tag)&&((!name)||(xstricmp(name,tag)))) - { - xnode=xnode.getChildNode(tag); - if (xnode.isEmpty()) - { - if (pResults) - { - pResults->error=eXMLErrorFirstTagNotFound; - pResults->nLine=0; - pResults->nColumn=0; - } - return emptyXMLNode; - } - } - } else - { - // Cleanup: this will destroy all the nodes - xnode = emptyXMLNode; + } else { + // Cleanup: this will destroy all the nodes + xnode = emptyXMLNode; + } + + // If we have been given somewhere to place results + if (pResults) { + pResults->error = error; + + // If we have an error + if (error != eXMLErrorNone) { + if (error == eXMLErrorMissingEndTag) + xml.nIndex = xml.nIndexMissigEndTag; + // Find which line and column it starts on. + CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); } - - - // If we have been given somewhere to place results + } + return xnode; +} + +XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, + XMLResults *pResults) { + if (pResults) { + pResults->nLine = 0; + pResults->nColumn = 0; + } + FILE *f = xfopen(filename, _CXML("rb")); + if (f == NULL) { if (pResults) - { - pResults->error = error; - - // If we have an error - if (error!=eXMLErrorNone) - { - if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag; - // Find which line and column it starts on. - CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); - } - } - return xnode; -} - -XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) -{ - if (pResults) { pResults->nLine=0; pResults->nColumn=0; } - FILE *f=xfopen(filename,_CXML("rb")); - if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } - fseek(f,0,SEEK_END); - int l=ftell(f),headerSz=0; - if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; } - fseek(f,0,SEEK_SET); - unsigned char *buf=(unsigned char*)malloc(l+4); - l=fread(buf,1,l,f); + pResults->error = eXMLErrorFileNotFound; + return emptyXMLNode; + } + fseek(f, 0, SEEK_END); + int l = ftell(f), headerSz = 0; + if (!l) { + if (pResults) + pResults->error = eXMLErrorEmpty; fclose(f); - buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0; + return emptyXMLNode; + } + fseek(f, 0, SEEK_SET); + unsigned char *buf = (unsigned char *)malloc(l + 4); + l = fread(buf, 1, l, f); + fclose(f); + buf[l] = 0; + buf[l + 1] = 0; + buf[l + 2] = 0; + buf[l + 3] = 0; #ifdef _XMLWIDECHAR - if (guessWideCharChars) - { - if (!myIsTextWideChar(buf,l)) - { - XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy; - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; } - XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; - } + if (guessWideCharChars) { + if (!myIsTextWideChar(buf, l)) { + XMLNode::XMLCharEncoding ce = XMLNode::char_encoding_legacy; + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) { + headerSz = 3; + ce = XMLNode::char_encoding_UTF8; + } + XMLSTR b2 = myMultiByteToWideChar((const char *)(buf + headerSz), ce); + free(buf); + buf = (unsigned char *)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) + headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) + headerSz = 2; } + } #else - if (guessWideCharChars) - { - if (myIsTextWideChar(buf,l)) - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; - char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz)); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; - } + if (guessWideCharChars) { + if (myIsTextWideChar(buf, l)) { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) + headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) + headerSz = 2; + char *b2 = myWideCharToMultiByte((const wchar_t *)(buf + headerSz)); + free(buf); + buf = (unsigned char *)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) + headerSz = 3; } + } #endif - if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } - XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); - free(buf); - return x; + if (!buf) { + if (pResults) + pResults->error = eXMLErrorCharConversionError; + return emptyXMLNode; + } + XMLNode x = parseString((XMLSTR)(buf + headerSz), tag, pResults); + free(buf); + return x; } -static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; } +static inline void charmemset(XMLSTR dest, XMLCHAR c, int l) { + while (l--) + *(dest++) = c; +} // private: // Creates an user friendly XML string from a given element with // appropriate white space and carriage returns. // // This recurses through all subnodes then adds contents of the nodes to the // string. -int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat) -{ - int nResult = 0; - int cb=nFormat<0?0:nFormat; - int cbElement; - int nChildFormat=-1; - int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear; - int i,j; - if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2; - - assert(pEntry); +int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, + int nFormat) { + int nResult = 0; + int cb = nFormat < 0 ? 0 : nFormat; + int cbElement; + int nChildFormat = -1; + int nElementI = pEntry->nChild + pEntry->nText + pEntry->nClear; + int i, j; + if ((nFormat >= 0) && (nElementI == 1) && (pEntry->nText == 1) && + (!pEntry->isDeclaration)) + nFormat = -2; + + assert(pEntry); #define LENSTR(lpsz) (lpsz ? xstrlen(lpsz) : 0) - // If the element has no name then assume this is the head node. - cbElement = (int)LENSTR(pEntry->lpszName); + // If the element has no name then assume this is the head node. + cbElement = (int)LENSTR(pEntry->lpszName); + + if (cbElement) { + // "isDeclaration) + lpszMarker[nResult++] = _CXML('?'); + xstrcpy(&lpszMarker[nResult], pEntry->lpszName); + nResult += cbElement; + lpszMarker[nResult++] = _CXML(' '); + + } else { + nResult += cbElement + 2 + cb; + if (pEntry->isDeclaration) + nResult++; + } - if (cbElement) - { - // "pAttribute; + for (i = 0; i < pEntry->nAttribute; i++) { + // "Attrib + cb = (int)LENSTR(pAttr->lpszName); + if (cb) { if (lpszMarker) - { - if (cb) charmemset(lpszMarker, INDENTCHAR, cb); - nResult = cb; - lpszMarker[nResult++]=_CXML('<'); - if (pEntry->isDeclaration) lpszMarker[nResult++]=_CXML('?'); - xstrcpy(&lpszMarker[nResult], pEntry->lpszName); - nResult+=cbElement; - lpszMarker[nResult++]=_CXML(' '); - - } else - { - nResult+=cbElement+2+cb; - if (pEntry->isDeclaration) nResult++; - } - - // Enumerate attributes and add them to the string - XMLAttribute *pAttr=pEntry->pAttribute; - for (i=0; inAttribute; i++) - { - // "Attrib - cb = (int)LENSTR(pAttr->lpszName); + xstrcpy(&lpszMarker[nResult], pAttr->lpszName); + nResult += cb; + // "Attrib=Value " + if (pAttr->lpszValue) { + cb = (int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); + if (lpszMarker) { + lpszMarker[nResult] = _CXML('='); + lpszMarker[nResult + 1] = _CXML('"'); if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName); - nResult += cb; - // "Attrib=Value " - if (pAttr->lpszValue) - { - cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('='); - lpszMarker[nResult+1]=_CXML('"'); - if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue); - lpszMarker[nResult+cb+2]=_CXML('"'); - } - nResult+=cb+3; - } - if (lpszMarker) lpszMarker[nResult] = _CXML(' '); - nResult++; - } - pAttr++; + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult + 2], + pAttr->lpszValue); + lpszMarker[nResult + cb + 2] = _CXML('"'); + } + nResult += cb + 3; } - - if (pEntry->isDeclaration) - { - if (lpszMarker) - { - lpszMarker[nResult-1]=_CXML('?'); - lpszMarker[nResult]=_CXML('>'); - } - nResult++; - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); - nResult++; - } - } else - // If there are child nodes we need to terminate the start tag - if (nElementI) - { - if (lpszMarker) lpszMarker[nResult-1]=_CXML('>'); - if (nFormat>=0) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); - nResult++; - } - } else nResult--; - } - - // Calculate the child format for when we recurse. This is used to - // determine the number of spaces used for prefixes. - if (nFormat!=-1) - { - if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1; - else nChildFormat=nFormat; + if (lpszMarker) + lpszMarker[nResult] = _CXML(' '); + nResult++; + } + pAttr++; } - // Enumerate through remaining children - for (i=0; ipOrder[i]; - switch((XMLElementType)(j&3)) - { - // Text nodes - case eNodeText: - { - // "Text" - XMLCSTR pChild=pEntry->pText[j>>2]; - cb = (int)ToXMLStringTool::lengthXMLString(pChild); - if (cb) - { - if (nFormat>=0) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1); - ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild); - lpszMarker[nResult+nFormat+1+cb]=_CXML('\n'); - } - nResult+=cb+nFormat+2; - } else - { - if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); - nResult += cb; - } - } - break; + if (pEntry->isDeclaration) { + if (lpszMarker) { + lpszMarker[nResult - 1] = _CXML('?'); + lpszMarker[nResult] = _CXML('>'); + } + nResult++; + if (nFormat != -1) { + if (lpszMarker) + lpszMarker[nResult] = _CXML('\n'); + nResult++; + } + } else + // If there are child nodes we need to terminate the start tag + if (nElementI) { + if (lpszMarker) + lpszMarker[nResult - 1] = _CXML('>'); + if (nFormat >= 0) { + if (lpszMarker) + lpszMarker[nResult] = _CXML('\n'); + nResult++; + } + } else + nResult--; + } + + // Calculate the child format for when we recurse. This is used to + // determine the number of spaces used for prefixes. + if (nFormat != -1) { + if (cbElement && (!pEntry->isDeclaration)) + nChildFormat = nFormat + 1; + else + nChildFormat = nFormat; + } + + // Enumerate through remaining children + for (i = 0; i < nElementI; i++) { + j = pEntry->pOrder[i]; + switch ((XMLElementType)(j & 3)) { + // Text nodes + case eNodeText: { + // "Text" + XMLCSTR pChild = pEntry->pText[j >> 2]; + cb = (int)ToXMLStringTool::lengthXMLString(pChild); + if (cb) { + if (nFormat >= 0) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat + 1); + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult + nFormat + 1], + pChild); + lpszMarker[nResult + nFormat + 1 + cb] = _CXML('\n'); } + nResult += cb + nFormat + 2; + } else { + if (lpszMarker) + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); + nResult += cb; + } + } + break; + } + + // Clear type nodes + case eNodeClear: { + XMLClear *pChild = pEntry->pClear + (j >> 2); + // "OpenTag" + cb = (int)LENSTR(pChild->lpszOpenTag); + if (cb) { + if (nFormat != -1) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat + 1); + xstrcpy(&lpszMarker[nResult + nFormat + 1], pChild->lpszOpenTag); + } + nResult += cb + nFormat + 1; + } else { + if (lpszMarker) + xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); + nResult += cb; + } + } - // Clear type nodes - case eNodeClear: - { - XMLClear *pChild=pEntry->pClear+(j>>2); - // "OpenTag" - cb = (int)LENSTR(pChild->lpszOpenTag); - if (cb) - { - if (nFormat!=-1) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1); - xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag); - } - nResult+=cb+nFormat+1; - } - else - { - if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); - nResult += cb; - } - } - - // "OpenTag Value" - cb = (int)LENSTR(pChild->lpszValue); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue); - nResult += cb; - } - - // "OpenTag Value CloseTag" - cb = (int)LENSTR(pChild->lpszCloseTag); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); - nResult += cb; - } + // "OpenTag Value" + cb = (int)LENSTR(pChild->lpszValue); + if (cb) { + if (lpszMarker) + xstrcpy(&lpszMarker[nResult], pChild->lpszValue); + nResult += cb; + } - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); - nResult++; - } - break; - } + // "OpenTag Value CloseTag" + cb = (int)LENSTR(pChild->lpszCloseTag); + if (cb) { + if (lpszMarker) + xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); + nResult += cb; + } - // Element nodes - case eNodeChild: - { - // Recursively add child nodes - nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat); - break; - } - default: break; + if (nFormat != -1) { + if (lpszMarker) + lpszMarker[nResult] = _CXML('\n'); + nResult++; } + break; + } + + // Element nodes + case eNodeChild: { + // Recursively add child nodes + nResult += CreateXMLStringR(pEntry->pChild[j >> 2].d, + lpszMarker ? lpszMarker + nResult : 0, + nChildFormat); + break; + } + default: + break; } + } + + if ((cbElement) && (!pEntry->isDeclaration)) { + // If we have child entries we need to use long XML notation for + // closing the element - "blah blah blah" + if (nElementI) { + // "\0" + if (lpszMarker) { + if (nFormat >= 0) { + charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat); + nResult += nFormat; + } - if ((cbElement)&&(!pEntry->isDeclaration)) - { - // If we have child entries we need to use long XML notation for - // closing the element - "blah blah blah" - if (nElementI) - { - // "\0" - if (lpszMarker) - { - if (nFormat >=0) - { - charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat); - nResult+=nFormat; - } - - lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/'); - nResult += 2; - xstrcpy(&lpszMarker[nResult], pEntry->lpszName); - nResult += cbElement; - - lpszMarker[nResult]=_CXML('>'); - if (nFormat == -1) nResult++; - else - { - lpszMarker[nResult+1]=_CXML('\n'); - nResult+=2; - } - } else - { - if (nFormat>=0) nResult+=cbElement+4+nFormat; - else if (nFormat==-1) nResult+=cbElement+3; - else nResult+=cbElement+4; - } - } else - { - // If there are no children we can use shorthand XML notation - - // "" - // "/>\0" - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>'); - if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n'); - } - nResult += nFormat == -1 ? 2 : 3; + lpszMarker[nResult] = _CXML('<'); + lpszMarker[nResult + 1] = _CXML('/'); + nResult += 2; + xstrcpy(&lpszMarker[nResult], pEntry->lpszName); + nResult += cbElement; + + lpszMarker[nResult] = _CXML('>'); + if (nFormat == -1) + nResult++; + else { + lpszMarker[nResult + 1] = _CXML('\n'); + nResult += 2; } + } else { + if (nFormat >= 0) + nResult += cbElement + 4 + nFormat; + else if (nFormat == -1) + nResult += cbElement + 3; + else + nResult += cbElement + 4; + } + } else { + // If there are no children we can use shorthand XML notation - + // "" + // "/>\0" + if (lpszMarker) { + lpszMarker[nResult] = _CXML('/'); + lpszMarker[nResult + 1] = _CXML('>'); + if (nFormat != -1) + lpszMarker[nResult + 2] = _CXML('\n'); + } + nResult += nFormat == -1 ? 2 : 3; } + } - return nResult; + return nResult; } #undef LENSTR @@ -2084,638 +2396,927 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // NULL terminator. // @return XMLSTR - Allocated XML string, you must free // this with free(). -XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const -{ - if (!d) { if (pnSize) *pnSize=0; return NULL; } - - XMLSTR lpszResult = NULL; - int cbStr; - - // Recursively Calculate the size of the XML string - if (!dropWhiteSpace) nFormat=0; - nFormat = nFormat ? 0 : -1; - cbStr = CreateXMLStringR(d, 0, nFormat); - // Alllocate memory for the XML string + the NULL terminator and - // create the recursively XML string. - lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR)); - CreateXMLStringR(d, lpszResult, nFormat); - lpszResult[cbStr]=_CXML('\0'); - if (pnSize) *pnSize = cbStr; - return lpszResult; -} - -int XMLNode::detachFromParent(XMLNodeData *d) -{ - XMLNode *pa=d->pParent->pChild; - int i=0; - while (((void*)(pa[i].d))!=((void*)d)) i++; - d->pParent->nChild--; - if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode)); - else { free(pa); d->pParent->pChild=NULL; } - return removeOrderElement(d->pParent,eNodeChild,i); -} - -XMLNode::~XMLNode() -{ - if (!d) return; +XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const { + if (!d) { + if (pnSize) + *pnSize = 0; + return NULL; + } + + XMLSTR lpszResult = NULL; + int cbStr; + + // Recursively Calculate the size of the XML string + if (!dropWhiteSpace) + nFormat = 0; + nFormat = nFormat ? 0 : -1; + cbStr = CreateXMLStringR(d, 0, nFormat); + // Alllocate memory for the XML string + the NULL terminator and + // create the recursively XML string. + lpszResult = (XMLSTR)malloc((cbStr + 1) * sizeof(XMLCHAR)); + CreateXMLStringR(d, lpszResult, nFormat); + lpszResult[cbStr] = _CXML('\0'); + if (pnSize) + *pnSize = cbStr; + return lpszResult; +} + +int XMLNode::detachFromParent(XMLNodeData *d) { + XMLNode *pa = d->pParent->pChild; + int i = 0; + while (((void *)(pa[i].d)) != ((void *)d)) + i++; + d->pParent->nChild--; + if (d->pParent->nChild) + memmove(pa + i, pa + i + 1, (d->pParent->nChild - i) * sizeof(XMLNode)); + else { + free(pa); + d->pParent->pChild = NULL; + } + return removeOrderElement(d->pParent, eNodeChild, i); +} + +XMLNode::~XMLNode() { + if (!d) + return; + d->ref_count--; + emptyTheNode(0); +} +void XMLNode::deleteNodeContent() { + if (!d) + return; + if (d->pParent) { + detachFromParent(d); + d->pParent = NULL; d->ref_count--; - emptyTheNode(0); -} -void XMLNode::deleteNodeContent() -{ - if (!d) return; - if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; } - emptyTheNode(1); -} -void XMLNode::emptyTheNode(char force) -{ - XMLNodeData *dd=d; // warning: must stay this way! - if ((dd->ref_count==0)||force) - { - if (d->pParent) detachFromParent(d); - int i; - XMLNode *pc; - for(i=0; inChild; i++) - { - pc=dd->pChild+i; - pc->d->pParent=NULL; - pc->d->ref_count--; - pc->emptyTheNode(force); - } - myFree(dd->pChild); - for(i=0; inText; i++) free((void*)dd->pText[i]); - myFree(dd->pText); - for(i=0; inClear; i++) free((void*)dd->pClear[i].lpszValue); - myFree(dd->pClear); - for(i=0; inAttribute; i++) - { - free((void*)dd->pAttribute[i].lpszName); - if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue); - } - myFree(dd->pAttribute); - myFree(dd->pOrder); - myFree((void*)dd->lpszName); - dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0; - dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL; - dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL; - } - if (dd->ref_count==0) - { - free(dd); - d=NULL; - } -} - -XMLNode& XMLNode::operator=( const XMLNode& A ) -{ - // shallow copy - if (this != &A) - { - if (d) { d->ref_count--; emptyTheNode(0); } - d=A.d; - if (d) (d->ref_count) ++ ; + } + emptyTheNode(1); +} +void XMLNode::emptyTheNode(char force) { + XMLNodeData *dd = d; // warning: must stay this way! + if ((dd->ref_count == 0) || force) { + if (d->pParent) + detachFromParent(d); + int i; + XMLNode *pc; + for (i = 0; i < dd->nChild; i++) { + pc = dd->pChild + i; + pc->d->pParent = NULL; + pc->d->ref_count--; + pc->emptyTheNode(force); } - return *this; -} - -XMLNode::XMLNode(const XMLNode &A) -{ - // shallow copy - d=A.d; - if (d) (d->ref_count)++ ; -} - -XMLNode XMLNode::deepCopy() const -{ - if (!d) return XMLNode::emptyXMLNode; - XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration); - XMLNodeData *p=x.d; - int n=d->nAttribute; - if (n) - { - p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute)); - while (n--) - { - p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName); - p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue); - } + myFree(dd->pChild); + for (i = 0; i < dd->nText; i++) + free((void *)dd->pText[i]); + myFree(dd->pText); + for (i = 0; i < dd->nClear; i++) + free((void *)dd->pClear[i].lpszValue); + myFree(dd->pClear); + for (i = 0; i < dd->nAttribute; i++) { + free((void *)dd->pAttribute[i].lpszName); + if (dd->pAttribute[i].lpszValue) + free((void *)dd->pAttribute[i].lpszValue); } - if (d->pOrder) - { - n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n); + myFree(dd->pAttribute); + myFree(dd->pOrder); + myFree((void *)dd->lpszName); + dd->nChild = 0; + dd->nText = 0; + dd->nClear = 0; + dd->nAttribute = 0; + dd->pChild = NULL; + dd->pText = NULL; + dd->pClear = NULL; + dd->pAttribute = NULL; + dd->pOrder = NULL; + dd->lpszName = NULL; + dd->pParent = NULL; + } + if (dd->ref_count == 0) { + free(dd); + d = NULL; + } +} + +XMLNode &XMLNode::operator=(const XMLNode &A) { + // shallow copy + if (this != &A) { + if (d) { + d->ref_count--; + emptyTheNode(0); } - n=d->nText; - if (n) - { - p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR)); - while(n--) p->pText[n]=stringDup(d->pText[n]); + d = A.d; + if (d) + (d->ref_count)++; + } + return *this; +} + +XMLNode::XMLNode(const XMLNode &A) { + // shallow copy + d = A.d; + if (d) + (d->ref_count)++; +} + +XMLNode XMLNode::deepCopy() const { + if (!d) + return XMLNode::emptyXMLNode; + XMLNode x(NULL, stringDup(d->lpszName), d->isDeclaration); + XMLNodeData *p = x.d; + int n = d->nAttribute; + if (n) { + p->nAttribute = n; + p->pAttribute = (XMLAttribute *)malloc(n * sizeof(XMLAttribute)); + while (n--) { + p->pAttribute[n].lpszName = stringDup(d->pAttribute[n].lpszName); + p->pAttribute[n].lpszValue = stringDup(d->pAttribute[n].lpszValue); } - n=d->nClear; - if (n) - { - p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear)); - while (n--) - { - p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag; - p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag; - p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue); - } + } + if (d->pOrder) { + n = (d->nChild + d->nText + d->nClear) * sizeof(int); + p->pOrder = (int *)malloc(n); + memcpy(p->pOrder, d->pOrder, n); + } + n = d->nText; + if (n) { + p->nText = n; + p->pText = (XMLCSTR *)malloc(n * sizeof(XMLCSTR)); + while (n--) + p->pText[n] = stringDup(d->pText[n]); + } + n = d->nClear; + if (n) { + p->nClear = n; + p->pClear = (XMLClear *)malloc(n * sizeof(XMLClear)); + while (n--) { + p->pClear[n].lpszCloseTag = d->pClear[n].lpszCloseTag; + p->pClear[n].lpszOpenTag = d->pClear[n].lpszOpenTag; + p->pClear[n].lpszValue = stringDup(d->pClear[n].lpszValue); } - n=d->nChild; - if (n) - { - p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode)); - while (n--) - { - p->pChild[n].d=NULL; - p->pChild[n]=d->pChild[n].deepCopy(); - p->pChild[n].d->pParent=p; - } + } + n = d->nChild; + if (n) { + p->nChild = n; + p->pChild = (XMLNode *)malloc(n * sizeof(XMLNode)); + while (n--) { + p->pChild[n].d = NULL; + p->pChild[n] = d->pChild[n].deepCopy(); + p->pChild[n].d->pParent = p; } - return x; + } + return x; } -XMLNode XMLNode::addChild(XMLNode childNode, int pos) -{ - XMLNodeData *dc=childNode.d; - if ((!dc)||(!d)) return childNode; - if (!dc->lpszName) - { - // this is a root node: todo: correct fix - int j=pos; - while (dc->nChild) - { - addChild(dc->pChild[0],j); - if (pos>=0) j++; - } - return childNode; +XMLNode XMLNode::addChild(XMLNode childNode, int pos) { + XMLNodeData *dc = childNode.d; + if ((!dc) || (!d)) + return childNode; + if (!dc->lpszName) { + // this is a root node: todo: correct fix + int j = pos; + while (dc->nChild) { + addChild(dc->pChild[0], j); + if (pos >= 0) + j++; } - if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++; - dc->pParent=d; -// int nc=d->nChild; -// d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); - d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=dc; - d->nChild++; return childNode; + } + if (dc->pParent) { + if ((detachFromParent(dc) <= pos) && (dc->pParent == d)) + pos--; + } else + dc->ref_count++; + dc->pParent = d; + // int nc=d->nChild; + // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); + d->pChild = (XMLNode *)addToOrder(0, &pos, d->nChild, d->pChild, + sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = dc; + d->nChild++; + return childNode; +} + +void XMLNode::deleteAttribute(int i) { + if ((!d) || (i < 0) || (i >= d->nAttribute)) + return; + d->nAttribute--; + XMLAttribute *p = d->pAttribute + i; + free((void *)p->lpszName); + if (p->lpszValue) + free((void *)p->lpszValue); + if (d->nAttribute) + memmove(p, p + 1, (d->nAttribute - i) * sizeof(XMLAttribute)); + else { + free(p); + d->pAttribute = NULL; + } +} + +void XMLNode::deleteAttribute(XMLAttribute *a) { + if (a) + deleteAttribute(a->lpszName); +} +void XMLNode::deleteAttribute(XMLCSTR lpszName) { + int j = 0; + getAttribute(lpszName, &j); + if (j) + deleteAttribute(j - 1); +} + +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, int i) { + if (!d) { + if (lpszNewValue) + free(lpszNewValue); + if (lpszNewName) + free(lpszNewName); + return NULL; + } + if (i >= d->nAttribute) { + if (lpszNewName) + return addAttribute_WOSD(lpszNewName, lpszNewValue); + return NULL; + } + XMLAttribute *p = d->pAttribute + i; + if (p->lpszValue && p->lpszValue != lpszNewValue) + free((void *)p->lpszValue); + p->lpszValue = lpszNewValue; + if (lpszNewName && p->lpszName != lpszNewName) { + free((void *)p->lpszName); + p->lpszName = lpszNewName; + }; + return p; +} + +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + if (oldAttribute) + return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue, + (XMLSTR)newAttribute->lpszName, + oldAttribute->lpszName); + return addAttribute_WOSD((XMLSTR)newAttribute->lpszName, + (XMLSTR)newAttribute->lpszValue); +} + +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, + XMLCSTR lpszOldName) { + int j = 0; + getAttribute(lpszOldName, &j); + if (j) + return updateAttribute_WOSD(lpszNewValue, lpszNewName, j - 1); + else { + if (lpszNewName) + return addAttribute_WOSD(lpszNewName, lpszNewValue); + else + return addAttribute_WOSD(stringDup(lpszOldName), lpszNewValue); + } } -void XMLNode::deleteAttribute(int i) -{ - if ((!d)||(i<0)||(i>=d->nAttribute)) return; - d->nAttribute--; - XMLAttribute *p=d->pAttribute+i; - free((void*)p->lpszName); - if (p->lpszValue) free((void*)p->lpszValue); - if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; } +int XMLNode::indexText(XMLCSTR lpszValue) const { + if (!d) + return -1; + int i, l = d->nText; + if (!lpszValue) { + if (l) + return 0; + return -1; + } + XMLCSTR *p = d->pText; + for (i = 0; i < l; i++) + if (lpszValue == p[i]) + return i; + return -1; +} + +void XMLNode::deleteText(int i) { + if ((!d) || (i < 0) || (i >= d->nText)) + return; + d->nText--; + XMLCSTR *p = d->pText + i; + free((void *)*p); + if (d->nText) + memmove(p, p + 1, (d->nText - i) * sizeof(XMLCSTR)); + else { + free(p); + d->pText = NULL; + } + removeOrderElement(d, eNodeText, i); +} + +void XMLNode::deleteText(XMLCSTR lpszValue) { + deleteText(indexText(lpszValue)); +} + +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) { + if (!d) { + if (lpszNewValue) + free(lpszNewValue); + return NULL; + } + if (i >= d->nText) + return addText_WOSD(lpszNewValue); + XMLCSTR *p = d->pText + i; + if (*p != lpszNewValue) { + free((void *)*p); + *p = lpszNewValue; + } + return lpszNewValue; +} + +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewValue) + free(lpszNewValue); + return NULL; + } + int i = indexText(lpszOldValue); + if (i >= 0) + return updateText_WOSD(lpszNewValue, i); + return addText_WOSD(lpszNewValue); +} + +void XMLNode::deleteClear(int i) { + if ((!d) || (i < 0) || (i >= d->nClear)) + return; + d->nClear--; + XMLClear *p = d->pClear + i; + free((void *)p->lpszValue); + if (d->nClear) + memmove(p, p + 1, (d->nClear - i) * sizeof(XMLClear)); + else { + free(p); + d->pClear = NULL; + } + removeOrderElement(d, eNodeClear, i); +} + +int XMLNode::indexClear(XMLCSTR lpszValue) const { + if (!d) + return -1; + int i, l = d->nClear; + if (!lpszValue) { + if (l) + return 0; + return -1; + } + XMLClear *p = d->pClear; + for (i = 0; i < l; i++) + if (lpszValue == p[i].lpszValue) + return i; + return -1; } -void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); } -void XMLNode::deleteAttribute(XMLCSTR lpszName) -{ - int j=0; - getAttribute(lpszName,&j); - if (j) deleteAttribute(j-1); +void XMLNode::deleteClear(XMLCSTR lpszValue) { + deleteClear(indexClear(lpszValue)); +} +void XMLNode::deleteClear(XMLClear *a) { + if (a) + deleteClear(a->lpszValue); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; } - if (i>=d->nAttribute) - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); - return NULL; - } - XMLAttribute *p=d->pAttribute+i; - if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue); - p->lpszValue=lpszNewValue; - if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; }; - return p; +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) { + if (!d) { + if (lpszNewContent) + free(lpszNewContent); + return NULL; + } + if (i >= d->nClear) + return addClear_WOSD(lpszNewContent); + XMLClear *p = d->pClear + i; + if (lpszNewContent != p->lpszValue) { + free((void *)p->lpszValue); + p->lpszValue = lpszNewContent; + } + return p; +} + +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, + XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewContent) + free(lpszNewContent); + return NULL; + } + int i = indexClear(lpszOldValue); + if (i >= 0) + return updateClear_WOSD(lpszNewContent, i); + return addClear_WOSD(lpszNewContent); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) -{ - if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName); - return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue); +XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP, XMLClear *oldP) { + if (oldP) + return updateClear_WOSD((XMLSTR)newP->lpszValue, (XMLSTR)oldP->lpszValue); + return NULL; } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName) -{ - int j=0; - getAttribute(lpszOldName,&j); - if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1); - else - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); - else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue); +int XMLNode::nChildNode(XMLCSTR name) const { + if (!d) + return 0; + int i, j = 0, n = d->nChild; + XMLNode *pc = d->pChild; + for (i = 0; i < n; i++) { + if (xstricmp(pc->d->lpszName, name) == 0) + j++; + pc++; + } + return j; +} + +XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const { + if (!d) + return emptyXMLNode; + int i = 0, n = d->nChild; + if (j) + i = *j; + XMLNode *pc = d->pChild + i; + for (; i < n; i++) { + if (!xstricmp(pc->d->lpszName, name)) { + if (j) + *j = i + 1; + return *pc; } + pc++; + } + return emptyXMLNode; } -int XMLNode::indexText(XMLCSTR lpszValue) const -{ - if (!d) return -1; - int i,l=d->nText; - if (!lpszValue) { if (l) return 0; return -1; } - XMLCSTR *p=d->pText; - for (i=0; i= 0) { + int i = 0; + while (j-- > 0) + getChildNode(name, &i); + return getChildNode(name, &i); + } + int i = d->nChild; + while (i--) + if (!xstricmp(name, d->pChild[i].d->lpszName)) + break; + if (i < 0) + return emptyXMLNode; + return getChildNode(i); } -void XMLNode::deleteText(int i) -{ - if ((!d)||(i<0)||(i>=d->nText)) return; - d->nText--; - XMLCSTR *p=d->pText+i; - free((void*)*p); - if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; } - removeOrderElement(d,eNodeText,i); +XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, + XMLCHAR sep) { + XMLSTR path = stringDup(_path); + XMLNode x = getChildNodeByPathNonConst(path, createMissing, sep); + if (path) + free(path); + return x; } -void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); } - -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - if (i>=d->nText) return addText_WOSD(lpszNewValue); - XMLCSTR *p=d->pText+i; - if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; } - return lpszNewValue; +XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, + XMLCHAR sep) { + if ((!path) || (!(*path))) + return *this; + XMLNode xn, xbase = *this; + XMLCHAR *tend1, sepString[2]; + sepString[0] = sep; + sepString[1] = 0; + tend1 = xstrstr(path, sepString); + while (tend1) { + *tend1 = 0; + xn = xbase.getChildNode(path); + if (xn.isEmpty()) { + if (createIfMissing) + xn = xbase.addChild(path); + else { + *tend1 = sep; + return XMLNode::emptyXMLNode; + } + } + *tend1 = sep; + xbase = xn; + path = tend1 + 1; + tend1 = xstrstr(path, sepString); + } + xn = xbase.getChildNode(path); + if (xn.isEmpty() && createIfMissing) + xn = xbase.addChild(path); + return xn; +} + +XMLElementPosition XMLNode::positionOfText(int i) const { + if (i >= d->nText) + i = d->nText - 1; + return findPosition(d, i, eNodeText); +} +XMLElementPosition XMLNode::positionOfClear(int i) const { + if (i >= d->nClear) + i = d->nClear - 1; + return findPosition(d, i, eNodeClear); +} +XMLElementPosition XMLNode::positionOfChildNode(int i) const { + if (i >= d->nChild) + i = d->nChild - 1; + return findPosition(d, i, eNodeChild); +} +XMLElementPosition XMLNode::positionOfText(XMLCSTR lpszValue) const { + return positionOfText(indexText(lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { + return positionOfClear(indexClear(lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { + if (a) + return positionOfClear(a->lpszValue); + return positionOfClear(); +} +XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const { + if ((!d) || (!x.d)) + return -1; + XMLNodeData *dd = x.d; + XMLNode *pc = d->pChild; + int i = d->nChild; + while (i--) + if (pc[i].d == dd) + return findPosition(d, i, eNodeChild); + return -1; +} +XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const { + if (!name) + return positionOfChildNode(count); + int j = 0; + do { + getChildNode(name, &j); + if (j < 0) + return -1; + } while (count--); + return findPosition(d, j - 1, eNodeChild); +} + +XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, XMLCSTR attributeName, + XMLCSTR attributeValue, + int *k) const { + int i = 0, j; + if (k) + i = *k; + XMLNode x; + XMLCSTR t; + do { + x = getChildNode(name, &i); + if (!x.isEmpty()) { + if (attributeValue) { + j = 0; + do { + t = x.getAttribute(attributeName, &j); + if (t && (xstricmp(attributeValue, t) == 0)) { + if (k) + *k = i; + return x; + } + } while (t); + } else { + if (x.isAttributeSet(attributeName)) { + if (k) + *k = i; + return x; + } + } + } + } while (!x.isEmpty()); + return emptyXMLNode; } -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - int i=indexText(lpszOldValue); - if (i>=0) return updateText_WOSD(lpszNewValue,i); - return addText_WOSD(lpszNewValue); +// Find an attribute on an node. +XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const { + if (!d) + return NULL; + int i = 0, n = d->nAttribute; + if (j) + i = *j; + XMLAttribute *pAttr = d->pAttribute + i; + for (; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { + if (j) + *j = i + 1; + return pAttr->lpszValue; + } + pAttr++; + } + return NULL; } -void XMLNode::deleteClear(int i) -{ - if ((!d)||(i<0)||(i>=d->nClear)) return; - d->nClear--; - XMLClear *p=d->pClear+i; - free((void*)p->lpszValue); - if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; } - removeOrderElement(d,eNodeClear,i); +char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const { + if (!d) + return FALSE; + int i, n = d->nAttribute; + XMLAttribute *pAttr = d->pAttribute; + for (i = 0; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { + return TRUE; + } + pAttr++; + } + return FALSE; } -int XMLNode::indexClear(XMLCSTR lpszValue) const -{ - if (!d) return -1; - int i,l=d->nClear; - if (!lpszValue) { if (l) return 0; return -1; } - XMLClear *p=d->pClear; - for (i=0; i 0) + getAttribute(name, &i); + return getAttribute(name, &i); } -void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); } -void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); } - -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - if (i>=d->nClear) return addClear_WOSD(lpszNewContent); - XMLClear *p=d->pClear+i; - if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; } - return p; +XMLNodeContents XMLNode::enumContents(int i) const { + XMLNodeContents c; + if (!d) { + c.etype = eNodeNULL; + return c; + } + if (i < d->nAttribute) { + c.etype = eNodeAttribute; + c.attrib = d->pAttribute[i]; + return c; + } + i -= d->nAttribute; + c.etype = (XMLElementType)(d->pOrder[i] & 3); + i = (d->pOrder[i]) >> 2; + switch (c.etype) { + case eNodeChild: + c.child = d->pChild[i]; + break; + case eNodeText: + c.text = d->pText[i]; + break; + case eNodeClear: + c.clear = d->pClear[i]; + break; + default: + break; + } + return c; +} + +XMLCSTR XMLNode::getName() const { + if (!d) + return NULL; + return d->lpszName; } - -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - int i=indexClear(lpszOldValue); - if (i>=0) return updateClear_WOSD(lpszNewContent,i); - return addClear_WOSD(lpszNewContent); +int XMLNode::nText() const { + if (!d) + return 0; + return d->nText; } - -XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP) -{ - if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue); +int XMLNode::nChildNode() const { + if (!d) + return 0; + return d->nChild; +} +int XMLNode::nAttribute() const { + if (!d) + return 0; + return d->nAttribute; +} +int XMLNode::nClear() const { + if (!d) + return 0; + return d->nClear; +} +int XMLNode::nElement() const { + if (!d) + return 0; + return d->nAttribute + d->nChild + d->nText + d->nClear; +} +XMLClear XMLNode::getClear(int i) const { + if ((!d) || (i >= d->nClear)) + return emptyXMLClear; + return d->pClear[i]; +} +XMLAttribute XMLNode::getAttribute(int i) const { + if ((!d) || (i >= d->nAttribute)) + return emptyXMLAttribute; + return d->pAttribute[i]; +} +XMLCSTR XMLNode::getAttributeName(int i) const { + if ((!d) || (i >= d->nAttribute)) return NULL; + return d->pAttribute[i].lpszName; } - -int XMLNode::nChildNode(XMLCSTR name) const -{ - if (!d) return 0; - int i,j=0,n=d->nChild; - XMLNode *pc=d->pChild; - for (i=0; id->lpszName, name)==0) j++; - pc++; - } - return j; +XMLCSTR XMLNode::getAttributeValue(int i) const { + if ((!d) || (i >= d->nAttribute)) + return NULL; + return d->pAttribute[i].lpszValue; } - -XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const -{ - if (!d) return emptyXMLNode; - int i=0,n=d->nChild; - if (j) i=*j; - XMLNode *pc=d->pChild+i; - for (; id->lpszName, name)) - { - if (j) *j=i+1; - return *pc; - } - pc++; - } +XMLCSTR XMLNode::getText(int i) const { + if ((!d) || (i >= d->nText)) + return NULL; + return d->pText[i]; +} +XMLNode XMLNode::getChildNode(int i) const { + if ((!d) || (i >= d->nChild)) return emptyXMLNode; + return d->pChild[i]; } - -XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const -{ - if (!d) return emptyXMLNode; - if (j>=0) - { - int i=0; - while (j-->0) getChildNode(name,&i); - return getChildNode(name,&i); - } - int i=d->nChild; - while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break; - if (i<0) return emptyXMLNode; - return getChildNode(i); -} - -XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) -{ - XMLSTR path=stringDup(_path); - XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep); - if (path) free(path); - return x; -} - -XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep) -{ - if ((!path)||(!(*path))) return *this; - XMLNode xn,xbase=*this; - XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0; - tend1=xstrstr(path,sepString); - while(tend1) - { - *tend1=0; - xn=xbase.getChildNode(path); - if (xn.isEmpty()) - { - if (createIfMissing) xn=xbase.addChild(path); - else { *tend1=sep; return XMLNode::emptyXMLNode; } - } - *tend1=sep; - xbase=xn; - path=tend1+1; - tend1=xstrstr(path,sepString); - } - xn=xbase.getChildNode(path); - if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path); - return xn; -} - -XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); } -XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); } -XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); } -XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); } -XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const -{ - if ((!d)||(!x.d)) return -1; - XMLNodeData *dd=x.d; - XMLNode *pc=d->pChild; - int i=d->nChild; - while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild); - return -1; +XMLNode XMLNode::getParentNode() const { + if ((!d) || (!d->pParent)) + return emptyXMLNode; + return XMLNode(d->pParent); } -XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const -{ - if (!name) return positionOfChildNode(count); - int j=0; - do { getChildNode(name,&j); if (j<0) return -1; } while (count--); - return findPosition(d,j-1,eNodeChild); +char XMLNode::isDeclaration() const { + if (!d) + return 0; + return d->isDeclaration; } +char XMLNode::isEmpty() const { return (d == NULL); } +XMLNode XMLNode::emptyNode() { return XMLNode::emptyXMLNode; } -XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const -{ - int i=0,j; - if (k) i=*k; - XMLNode x; - XMLCSTR t; - do - { - x=getChildNode(name,&i); - if (!x.isEmpty()) - { - if (attributeValue) - { - j=0; - do - { - t=x.getAttribute(attributeName,&j); - if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; } - } while (t); - } else - { - if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; } - } - } - } while (!x.isEmpty()); - return emptyXMLNode; +XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, stringDup(lpszName), isDeclaration, pos); } - -// Find an attribute on an node. -XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const -{ - if (!d) return NULL; - int i=0,n=d->nAttribute; - if (j) i=*j; - XMLAttribute *pAttr=d->pAttribute+i; - for (; ilpszName, lpszAttrib)==0) - { - if (j) *j=i+1; - return pAttr->lpszValue; - } - pAttr++; - } - return NULL; +XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, lpszName, isDeclaration, pos); } - -char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const -{ - if (!d) return FALSE; - int i,n=d->nAttribute; - XMLAttribute *pAttr=d->pAttribute; - for (i=0; ilpszName, lpszAttrib)==0) - { - return TRUE; - } - pAttr++; - } - return FALSE; +XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) { + return addAttribute_priv(0, stringDup(lpszName), stringDup(lpszValue)); } - -XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const -{ - if (!d) return NULL; - int i=0; - while (j-->0) getAttribute(name,&i); - return getAttribute(name,&i); +XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) { + return addAttribute_priv(0, lpszName, lpszValuev); } - -XMLNodeContents XMLNode::enumContents(int i) const -{ - XMLNodeContents c; - if (!d) { c.etype=eNodeNULL; return c; } - if (inAttribute) - { - c.etype=eNodeAttribute; - c.attrib=d->pAttribute[i]; - return c; - } - i-=d->nAttribute; - c.etype=(XMLElementType)(d->pOrder[i]&3); - i=(d->pOrder[i])>>2; - switch (c.etype) - { - case eNodeChild: c.child = d->pChild[i]; break; - case eNodeText: c.text = d->pText[i]; break; - case eNodeClear: c.clear = d->pClear[i]; break; - default: break; - } - return c; +XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, stringDup(lpszValue), pos); +} +XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, lpszValue, pos); +} +XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, stringDup(lpszValue), lpszOpen, lpszClose, pos); +} +XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, lpszValue, lpszOpen, lpszClose, pos); +} +XMLCSTR XMLNode::updateName(XMLCSTR lpszName) { + return updateName_WOSD(stringDup(lpszName)); +} +XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + return updateAttribute_WOSD(stringDup(newAttribute->lpszValue), + stringDup(newAttribute->lpszName), + oldAttribute->lpszName); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, int i) { + return updateAttribute_WOSD(stringDup(lpszNewValue), stringDup(lpszNewName), + i); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, + XMLCSTR lpszOldName) { + return updateAttribute_WOSD(stringDup(lpszNewValue), stringDup(lpszNewName), + lpszOldName); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) { + return updateText_WOSD(stringDup(lpszNewValue), i); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateText_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) { + return updateClear_WOSD(stringDup(lpszNewContent), i); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateClear_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLClear *newP, XMLClear *oldP) { + return updateClear_WOSD(stringDup(newP->lpszValue), oldP->lpszValue); } -XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; } -int XMLNode::nText() const { if (!d) return 0; return d->nText; } -int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; } -int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; } -int XMLNode::nClear() const { if (!d) return 0; return d->nClear; } -int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; } -XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; } -XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; } -XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; } -XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; } -XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; } -XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; } -XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); } -char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; } -char XMLNode::isEmpty ( ) const { return (d==NULL); } -XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; } - -XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); } -XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,lpszName,isDeclaration,pos); } -XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) - { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); } -XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) - { return addAttribute_priv(0,lpszName,lpszValuev); } -XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,stringDup(lpszValue),pos); } -XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,lpszValue,pos); } -XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); } -XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); } -XMLCSTR XMLNode::updateName(XMLCSTR lpszName) - { return updateName_WOSD(stringDup(lpszName)); } -XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) - { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) - { return updateText_WOSD(stringDup(lpszNewValue),i); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) - { return updateClear_WOSD(stringDup(lpszNewContent),i); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP) - { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); } - -char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars, - char _dropWhiteSpace, char _removeCommentsInMiddleOfText) -{ - guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText; +char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, + char _guessWideCharChars, char _dropWhiteSpace, + char _removeCommentsInMiddleOfText) { + guessWideCharChars = _guessWideCharChars; + dropWhiteSpace = _dropWhiteSpace; + removeCommentsInMiddleOfText = _removeCommentsInMiddleOfText; #ifdef _XMLWIDECHAR - if (_characterEncoding) characterEncoding=_characterEncoding; + if (_characterEncoding) + characterEncoding = _characterEncoding; #else - switch(_characterEncoding) - { - case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break; - case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break; - case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break; - case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break; + switch (_characterEncoding) { + case char_encoding_UTF8: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_utf8ByteTable; + break; + case char_encoding_legacy: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_legacyByteTable; + break; + case char_encoding_ShiftJIS: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_sjisByteTable; + break; + case char_encoding_GB2312: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gb2312ByteTable; + break; case char_encoding_Big5: - case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break; - default: return 1; - } + case char_encoding_GBK: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gbk_big5_ByteTable; + break; + default: + return 1; + } #endif - return 0; + return 0; } -XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute) -{ +XMLNode::XMLCharEncoding +XMLNode::guessCharEncoding(void *buf, int l, char useXMLEncodingAttribute) { #ifdef _XMLWIDECHAR - return (XMLCharEncoding)0; + return (XMLCharEncoding)0; #else - if (l<25) return (XMLCharEncoding)0; - if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0; - unsigned char *b=(unsigned char*)buf; - if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8; - - // Match utf-8 model ? - XMLCharEncoding bestGuess=char_encoding_UTF8; - int i=0; - while (i>2 ]; - *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F]; - *(curr++)=base64Fillchar; - *(curr++)=base64Fillchar; - } else if (eLen==2) - { - j=(inbuf[0]<<8)|inbuf[1]; - *(curr++)=base64EncodeTable[ j>>10 ]; - *(curr++)=base64EncodeTable[(j>> 4)&0x3f]; - *(curr++)=base64EncodeTable[(j<< 2)&0x3f]; - *(curr++)=base64Fillchar; +void XMLParserBase64Tool::freeBuffer() { + if (buf) + free(buf); + buf = NULL; + buflen = 0; +} + +int XMLParserBase64Tool::encodeLength(int inlen, char formatted) { + unsigned int i = ((inlen - 1) / 3 * 4 + 4 + 1); + if (formatted) + i += inlen / 54; + return i; +} + +XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, + char formatted) { + int i = encodeLength(inlen, formatted), k = 17, eLen = inlen / 3, j; + alloc(i * sizeof(XMLCHAR)); + XMLSTR curr = (XMLSTR)buf; + for (i = 0; i < eLen; i++) { + // Copy next three bytes into lower 24 bits of int, paying attention to + // sign. + j = (inbuf[0] << 16) | (inbuf[1] << 8) | inbuf[2]; + inbuf += 3; + // Encode the int into four chars + *(curr++) = base64EncodeTable[j >> 18]; + *(curr++) = base64EncodeTable[(j >> 12) & 0x3f]; + *(curr++) = base64EncodeTable[(j >> 6) & 0x3f]; + *(curr++) = base64EncodeTable[(j)&0x3f]; + if (formatted) { + if (!k) { + *(curr++) = _CXML('\n'); + k = 18; + } + k--; } - *(curr++)=0; - return (XMLSTR)buf; -} - -unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int size=0; - unsigned char c; - //skip any extra characters (e.g. newlines or spaces) - while (*data) - { + } + eLen = inlen - eLen * 3; // 0 - 2. + if (eLen == 1) { + *(curr++) = base64EncodeTable[inbuf[0] >> 2]; + *(curr++) = base64EncodeTable[(inbuf[0] << 4) & 0x3F]; + *(curr++) = base64Fillchar; + *(curr++) = base64Fillchar; + } else if (eLen == 2) { + j = (inbuf[0] << 8) | inbuf[1]; + *(curr++) = base64EncodeTable[j >> 10]; + *(curr++) = base64EncodeTable[(j >> 4) & 0x3f]; + *(curr++) = base64EncodeTable[(j << 2) & 0x3f]; + *(curr++) = base64Fillchar; + } + *(curr++) = 0; + return (XMLSTR)buf; +} + +unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data, XMLError *xe) { + if (xe) + *xe = eXMLErrorNone; + int size = 0; + unsigned char c; + // skip any extra characters (e.g. newlines or spaces) + while (*data) { #ifdef _XMLWIDECHAR - if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } + if (*data > 255) { + if (xe) + *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; + } #endif - c=base64DecodeTable[(unsigned char)(*data)]; - if (c<97) size++; - else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } - data++; + c = base64DecodeTable[(unsigned char)(*data)]; + if (c < 97) + size++; + else if (c == 98) { + if (xe) + *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; } - if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4; - if (size==0) return 0; - do { data--; size--; } while(*data==base64Fillchar); size++; - return (unsigned int)((size*3)/4); -} - -unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int i=0,p=0; - unsigned char d,c; - for(;;) - { + data++; + } + if (xe && (size % 4 != 0)) + *xe = eXMLErrorBase64DataSizeIsNotMultipleOf4; + if (size == 0) + return 0; + do { + data--; + size--; + } while (*data == base64Fillchar); + size++; + return (unsigned int)((size * 3) / 4); +} + +unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, + int len, XMLError *xe) { + if (xe) + *xe = eXMLErrorNone; + int i = 0, p = 0; + unsigned char d, c; + for (;;) { #ifdef _XMLWIDECHAR -#define BASE64DECODE_READ_NEXT_CHAR(c) \ - do { \ - if (data[i]>255){ c=98; break; } \ - c=base64DecodeTable[(unsigned char)data[i++]]; \ - }while (c==97); \ - if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } +#define BASE64DECODE_READ_NEXT_CHAR(c) \ + do { \ + if (data[i] > 255) { \ + c = 98; \ + break; \ + } \ + c = base64DecodeTable[(unsigned char)data[i++]]; \ + } while (c == 97); \ + if (c == 98) { \ + if (xe) \ + *xe = eXMLErrorBase64DecodeIllegalCharacter; \ + return 0; \ + } #else -#define BASE64DECODE_READ_NEXT_CHAR(c) \ - do { c=base64DecodeTable[(unsigned char)data[i++]]; }while (c==97); \ - if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } +#define BASE64DECODE_READ_NEXT_CHAR(c) \ + do { \ + c = base64DecodeTable[(unsigned char)data[i++]]; \ + } while (c == 97); \ + if (c == 98) { \ + if (xe) \ + *xe = eXMLErrorBase64DecodeIllegalCharacter; \ + return 0; \ + } #endif - BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { return 2; } - if (c==96) - { - if (p==(int)len) return 2; - if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; - return 1; - } + BASE64DECODE_READ_NEXT_CHAR(c) + if (c == 99) { + return 2; + } + if (c == 96) { + if (p == (int)len) + return 2; + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } - BASE64DECODE_READ_NEXT_CHAR(d) - if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; } - buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3)); - - BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (c==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; - return 0; - } - if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf)); - - BASE64DECODE_READ_NEXT_CHAR(d) - if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (d==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; - return 0; - } - if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((c<<6)&0xc0)|d); + BASE64DECODE_READ_NEXT_CHAR(d) + if ((d == 99) || (d == 96)) { + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; } -} -#undef BASE64DECODE_READ_NEXT_CHAR + if (p == (int)len) { + if (xe) + *xe = eXMLErrorBase64DecodeBufferTooSmall; + return 0; + } + buf[p++] = (unsigned char)((c << 2) | ((d >> 4) & 0x3)); -void XMLParserBase64Tool::alloc(int newsize) -{ - if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; } - if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } -} + BASE64DECODE_READ_NEXT_CHAR(c) + if (c == 99) { + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (c == 96) + return 2; + if (xe) + *xe = eXMLErrorBase64DecodeBufferTooSmall; + return 0; + } + if (c == 96) { + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((d << 4) & 0xf0) | ((c >> 2) & 0xf)); -unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - unsigned int len=decodeSize(data,xe); - if (outlen) *outlen=len; - if (!len) return NULL; - alloc(len+1); - if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; } - return (unsigned char*)buf; + BASE64DECODE_READ_NEXT_CHAR(d) + if (d == 99) { + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (d == 96) + return 2; + if (xe) + *xe = eXMLErrorBase64DecodeBufferTooSmall; + return 0; + } + if (d == 96) { + if (xe) + *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((c << 6) & 0xc0) | d); + } } +#undef BASE64DECODE_READ_NEXT_CHAR +void XMLParserBase64Tool::alloc(int newsize) { + if ((!buf) && (newsize)) { + buf = malloc(newsize); + buflen = newsize; + return; + } + if (newsize > buflen) { + buf = realloc(buf, newsize); + buflen = newsize; + } +} + +unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, + XMLError *xe) { + if (xe) + *xe = eXMLErrorNone; + unsigned int len = decodeSize(data, xe); + if (outlen) + *outlen = len; + if (!len) + return NULL; + alloc(len + 1); + if (!decode(data, (unsigned char *)buf, len, xe)) { + return NULL; + } + return (unsigned char *)buf; +} diff --git a/xmlParser.h b/xmlParser.h index 068b36f..fe875fa 100644 --- a/xmlParser.h +++ b/xmlParser.h @@ -9,18 +9,18 @@ * @version V2.41 * @author Frank Vanden Berghen * - * The following license terms for the "XMLParser library from Business-Insight" apply to projects - * that are in some way related to - * the "mcpat project", including applications - * using "mcpat project" and tools developed - * for enhancing "mcpat project". All other projects - * (not related to "mcpat project") have to use the "XMLParser library from Business-Insight" - * code under the Aladdin Free Public License (AFPL) - * See the file "AFPL-license.txt" for more informations about the AFPL license. - * (see http://www.artifex.com/downloads/doc/Public.htm for detailed AFPL terms) + * The following license terms for the "XMLParser library from Business-Insight" + *apply to projects that are in some way related to the "mcpat project", + *including applications using "mcpat project" and tools developed for enhancing + *"mcpat project". All other projects (not related to "mcpat project") have to + *use the "XMLParser library from Business-Insight" code under the Aladdin Free + *Public License (AFPL) See the file "AFPL-license.txt" for more informations + *about the AFPL license. (see http://www.artifex.com/downloads/doc/Public.htm + *for detailed AFPL terms) * - * Redistribution and use of the "XMLParser library from Business-Insight" in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * Redistribution and use of the "XMLParser library from Business-Insight" in + *source and binary forms, with or without modification, are permitted provided + *that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright @@ -46,71 +46,81 @@ * All rights reserved. * * \section tutorial First Tutorial - * You can follow a simple Tutorial to know the basics... + * You can follow a simple Tutorial to know + *the basics... * - * \section usage General usage: How to include the XMLParser library inside your project. + * \section usage General usage: How to include the XMLParser library inside + *your project. * - * The library is composed of two files: xmlParser.cpp and - * xmlParser.h. These are the ONLY 2 files that you need when - * using the library inside your own projects. + * The library is composed of two files: xmlParser.cpp and xmlParser.h. These are the ONLY 2 files that you + *need when using the library inside your own projects. * - * All the functions of the library are documented inside the comments of the file - * xmlParser.h. These comments can be transformed in - * full-fledged HTML documentation using the DOXYGEN software: simply type: "doxygen doxy.cfg" + * All the functions of the library are documented inside the comments of the + *file xmlParser.h. These comments can be + *transformed in full-fledged HTML documentation using the DOXYGEN software: + *simply type: "doxygen doxy.cfg" * - * By default, the XMLParser library uses (char*) for string representation.To use the (wchar_t*) - * version of the library, you need to define the "_UNICODE" preprocessor definition variable - * (this is usually done inside your project definition file) (This is done automatically for you - * when using Visual Studio). + * By default, the XMLParser library uses (char*) for string representation.To + *use the (wchar_t*) version of the library, you need to define the "_UNICODE" + *preprocessor definition variable (this is usually done inside your project + *definition file) (This is done automatically for you when using Visual + *Studio). * * \section example Advanced Tutorial and Many Examples of usage. * * Some very small introductory examples are described inside the Tutorial file * xmlParser.html * - * Some additional small examples are also inside the file xmlTest.cpp - * (for the "char*" version of the library) and inside the file - * xmlTestUnicode.cpp (for the "wchar_t*" - * version of the library). If you have a question, please review these additionnal examples - * before sending an e-mail to the author. + * Some additional small examples are also inside the file xmlTest.cpp (for the "char*" version of the + *library) and inside the file xmlTestUnicode.cpp (for the "wchar_t*" + * version of the library). If you have a question, please review these + *additionnal examples before sending an e-mail to the author. * * To build the examples: * - linux/unix: type "make" * - solaris: type "make -f makefile.solaris" * - windows: Visual Studio: double-click on xmlParser.dsw - * (under Visual Studio .NET, the .dsp and .dsw files will be automatically converted to .vcproj and .sln files) + * (under Visual Studio .NET, the .dsp and .dsw files will be automatically + *converted to .vcproj and .sln files) * * In order to build the examples you need some additional files: * - linux/unix: makefile * - solaris: makefile.solaris - * - windows: Visual Studio: *.dsp, xmlParser.dsw and also xmlParser.lib and xmlParser.dll + * - windows: Visual Studio: *.dsp, xmlParser.dsw and also xmlParser.lib and + *xmlParser.dll * * \section debugging Debugging with the XMLParser library * * \subsection debugwin Debugging under WINDOWS * - * Inside Visual C++, the "debug versions" of the memory allocation functions are - * very slow: Do not forget to compile in "release mode" to get maximum speed. - * When I had to debug a software that was using the XMLParser Library, it was usually - * a nightmare because the library was sooOOOoooo slow in debug mode (because of the - * slow memory allocations in Debug mode). To solve this - * problem, during all the debugging session, I am now using a very fast DLL version of the - * XMLParser Library (the DLL is compiled in release mode). Using the DLL version of - * the XMLParser Library allows me to have lightening XML parsing speed even in debug! - * Other than that, the DLL version is useless: In the release version of my tool, - * I always use the normal, ".cpp"-based, XMLParser Library (I simply include the - * xmlParser.cpp and - * xmlParser.h files into the project). + * Inside Visual C++, the "debug versions" of the memory allocation + *functions are very slow: Do not forget to compile in "release mode" to get + *maximum speed. When I had to debug a software that was using the XMLParser + *Library, it was usually a nightmare because the library was sooOOOoooo slow in + *debug mode (because of the slow memory allocations in Debug mode). To solve + *this problem, during all the debugging session, I am now using a very fast DLL + *version of the XMLParser Library (the DLL is compiled in release mode). Using + *the DLL version of the XMLParser Library allows me to have lightening XML + *parsing speed even in debug! Other than that, the DLL version is useless: In + *the release version of my tool, I always use the normal, ".cpp"-based, + *XMLParser Library (I simply include the xmlParser.cpp and xmlParser.h files into the project). * - * The file XMLNodeAutoexp.txt contains some - * "tweaks" that improve substancially the display of the content of the XMLNode objects - * inside the Visual Studio Debugger. Believe me, once you have seen inside the debugger - * the "smooth" display of the XMLNode objects, you cannot live without it anymore! + * The file XMLNodeAutoexp.txt + *contains some "tweaks" that improve substancially the display of the content + *of the XMLNode objects inside the Visual Studio Debugger. Believe me, once you + *have seen inside the debugger the "smooth" display of the XMLNode objects, you + *cannot live without it anymore! * * \subsection debuglinux Debugging under LINUX/UNIX * - * The speed of the debug version of the XMLParser library is tolerable so no extra - * work.has been done. + * The speed of the debug version of the XMLParser library is tolerable so + *no extra work.has been done. * ****************************************************************************/ @@ -120,16 +130,21 @@ #include #ifdef _UNICODE -// If you comment the next "define" line then the library will never "switch to" _UNICODE (wchar_t*) mode (16/32 bits per characters). -// This is useful when you get error messages like: -// 'XMLNode::openFileHelper' : cannot convert parameter 2 from 'const char [5]' to 'const wchar_t *' -// The _XMLWIDECHAR preprocessor variable force the XMLParser library into either utf16/32-mode (the proprocessor variable -// must be defined) or utf8-mode(the pre-processor variable must be undefined). +// If you comment the next "define" line then the library will never "switch to" +// _UNICODE (wchar_t*) mode (16/32 bits per characters). This is useful when you +// get error messages like: +// 'XMLNode::openFileHelper' : cannot convert parameter 2 from 'const char +// [5]' to 'const wchar_t *' +// The _XMLWIDECHAR preprocessor variable force the XMLParser library into +// either utf16/32-mode (the proprocessor variable must be defined) or +// utf8-mode(the pre-processor variable must be undefined). #define _XMLWIDECHAR #endif -#if defined(WIN32) || defined(UNDER_CE) || defined(_WIN32) || defined(WIN64) || defined(__BORLANDC__) -// comment the next line if you are under windows and the compiler is not Microsoft Visual Studio (6.0 or .NET) or Borland +#if defined(WIN32) || defined(UNDER_CE) || defined(_WIN32) || \ + defined(WIN64) || defined(__BORLANDC__) +// comment the next line if you are under windows and the compiler is not +// Microsoft Visual Studio (6.0 or .NET) or Borland #define _XMLWINDOWS #endif @@ -146,7 +161,8 @@ #define XMLDLLENTRY #endif -// uncomment the next line if you want no support for wchar_t* (no need for the or libraries anymore to compile) +// uncomment the next line if you want no support for wchar_t* (no need for the +// or libraries anymore to compile) //#define XML_NO_WIDE_CHAR #ifdef XML_NO_WIDE_CHAR @@ -166,597 +182,788 @@ // Some common types for char set portable code #ifdef _XMLWIDECHAR - #define _CXML(c) L ## c - #define XMLCSTR const wchar_t * - #define XMLSTR wchar_t * - #define XMLCHAR wchar_t +#define _CXML(c) L##c +#define XMLCSTR const wchar_t * +#define XMLSTR wchar_t * +#define XMLCHAR wchar_t #else - #define _CXML(c) c - #define XMLCSTR const char * - #define XMLSTR char * - #define XMLCHAR char +#define _CXML(c) c +#define XMLCSTR const char * +#define XMLSTR char * +#define XMLCHAR char #endif #ifndef FALSE - #define FALSE 0 +#define FALSE 0 #endif /* FALSE */ #ifndef TRUE - #define TRUE 1 +#define TRUE 1 #endif /* TRUE */ - /// Enumeration for XML parse errors. -typedef enum XMLError -{ - eXMLErrorNone = 0, - eXMLErrorMissingEndTag, - eXMLErrorNoXMLTagFound, - eXMLErrorEmpty, - eXMLErrorMissingTagName, - eXMLErrorMissingEndTagName, - eXMLErrorUnmatchedEndTag, - eXMLErrorUnmatchedEndClearTag, - eXMLErrorUnexpectedToken, - eXMLErrorNoElements, - eXMLErrorFileNotFound, - eXMLErrorFirstTagNotFound, - eXMLErrorUnknownCharacterEntity, - eXMLErrorCharacterCodeAbove255, - eXMLErrorCharConversionError, - eXMLErrorCannotOpenWriteFile, - eXMLErrorCannotWriteFile, - - eXMLErrorBase64DataSizeIsNotMultipleOf4, - eXMLErrorBase64DecodeIllegalCharacter, - eXMLErrorBase64DecodeTruncatedData, - eXMLErrorBase64DecodeBufferTooSmall +typedef enum XMLError { + eXMLErrorNone = 0, + eXMLErrorMissingEndTag, + eXMLErrorNoXMLTagFound, + eXMLErrorEmpty, + eXMLErrorMissingTagName, + eXMLErrorMissingEndTagName, + eXMLErrorUnmatchedEndTag, + eXMLErrorUnmatchedEndClearTag, + eXMLErrorUnexpectedToken, + eXMLErrorNoElements, + eXMLErrorFileNotFound, + eXMLErrorFirstTagNotFound, + eXMLErrorUnknownCharacterEntity, + eXMLErrorCharacterCodeAbove255, + eXMLErrorCharConversionError, + eXMLErrorCannotOpenWriteFile, + eXMLErrorCannotWriteFile, + + eXMLErrorBase64DataSizeIsNotMultipleOf4, + eXMLErrorBase64DecodeIllegalCharacter, + eXMLErrorBase64DecodeTruncatedData, + eXMLErrorBase64DecodeBufferTooSmall } XMLError; - -/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents -typedef enum XMLElementType -{ - eNodeChild=0, - eNodeAttribute=1, - eNodeText=2, - eNodeClear=3, - eNodeNULL=4 +/// Enumeration used to manage type of data. Use in conjunction with structure +/// XMLNodeContents +typedef enum XMLElementType { + eNodeChild = 0, + eNodeAttribute = 1, + eNodeText = 2, + eNodeClear = 3, + eNodeNULL = 4 } XMLElementType; /// Structure used to obtain error details if the parse fails. -typedef struct XMLResults -{ - enum XMLError error; - int nLine,nColumn; +typedef struct XMLResults { + enum XMLError error; + int nLine, nColumn; } XMLResults; /// Structure for XML clear (unformatted) node (usually comments) typedef struct XMLClear { - XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag; + XMLCSTR lpszValue; + XMLCSTR lpszOpenTag; + XMLCSTR lpszCloseTag; } XMLClear; /// Structure for XML attribute. typedef struct XMLAttribute { - XMLCSTR lpszName; XMLCSTR lpszValue; + XMLCSTR lpszName; + XMLCSTR lpszValue; } XMLAttribute; /// XMLElementPosition are not interchangeable with simple indexes typedef int XMLElementPosition; struct XMLNodeContents; - + /** @defgroup XMLParserGeneral The XML parser */ /// Main Class representing a XML node /** * All operations are performed using this class. - * \note The constructors of the XMLNode class are protected, so use instead one of these four methods to get your first instance of XMLNode: - *
    - *
  • XMLNode::parseString
  • - *
  • XMLNode::parseFile
  • - *
  • XMLNode::openFileHelper
  • - *
  • XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)
  • + * \note The constructors of the XMLNode class are protected, so use instead one + * of these four methods to get your first instance of XMLNode:
    • + * XMLNode::parseString
    • XMLNode::parseFile
    • + * XMLNode::openFileHelper
    • XMLNode::createXMLTopNode (or + * XMLNode::createXMLTopNode_WOSD)
    • *
    */ -typedef struct XMLDLLENTRY XMLNode -{ - private: - - struct XMLNodeDataTag; - - /// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode - XMLNode(struct XMLNodeDataTag *pParent, XMLSTR lpszName, char isDeclaration); - /// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode - XMLNode(struct XMLNodeDataTag *p); - - public: - static XMLCSTR getVersion();///< Return the XMLParser library version number - - /** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string. - * @ingroup XMLParserGeneral - * @{ */ - - /// Parse an XML string and return the root of a XMLNode tree representing the string. - static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL); - /**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is - * the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the - * "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error. - * If you still want to parse the file, you can use the APPROXIMATE_PARSING option as explained inside the note at the - * beginning of the "xmlParser.cpp" file. - * - * @param lpXMLString the XML string to parse - * @param tag the name of the first tag inside the XML file. If the tag parameter is omitted, this function returns a node that represents the head of the xml document including the declaration term (). - * @param pResults a pointer to a XMLResults variable that will contain some information that can be used to trace the XML parsing error. You can have a user-friendly explanation of the parsing error with the "getError" function. - */ - - /// Parse an XML file and return the root of a XMLNode tree representing the file. - static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL); - /**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is - * the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the - * "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error. - * If you still want to parse the file, you can use the APPROXIMATE_PARSING option as explained inside the note at the - * beginning of the "xmlParser.cpp" file. - * - * @param filename the path to the XML file to parse - * @param tag the name of the first tag inside the XML file. If the tag parameter is omitted, this function returns a node that represents the head of the xml document including the declaration term (). - * @param pResults a pointer to a XMLResults variable that will contain some information that can be used to trace the XML parsing error. You can have a user-friendly explanation of the parsing error with the "getError" function. - */ - - /// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made. - static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL); - /**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file. - * This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each - * application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files - * and program yourself thereafter an "error reporting" tailored for your needs (instead of using the very crude "error reporting" - * mechanism included inside the "openFileHelper" function). - * - * If the XML document is corrupted, the "openFileHelper" method will: - * - display an error message on the console (or inside a messageBox for windows). - * - stop execution (exit). - * - * I strongly suggest that you write your own "openFileHelper" method tailored to your needs. If you still want to parse - * the file, you can use the APPROXIMATE_PARSING option as explained inside the note at the beginning of the "xmlParser.cpp" file. - * - * @param filename the path of the XML file to parse. - * @param tag the name of the first tag inside the XML file. If the tag parameter is omitted, this function returns a node that represents the head of the xml document including the declaration term (). - */ - - static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error - - /// Create an XML string starting from the current XMLNode. - XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const; - /**< The returned string should be free'd using the "freeXMLString" function. - * - * If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element - * with appropriate white spaces and carriage returns. if pnSize is given it returns the size in character of the string. */ - - /// Save the content of an xmlNode inside a file - XMLError writeToFile(XMLCSTR filename, - const char *encoding=NULL, - char nFormat=1) const; - /**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns. - * If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8". - * If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS". - * If "_XMLWIDECHAR=1", then the "encoding" parameter is ignored and always set to "utf-16". - * If no "encoding" parameter is given the "ISO-8859-1" encoding is used. */ - /** @} */ - - /** @defgroup navigate Navigate the XMLNode structure - * @ingroup XMLParserGeneral - * @{ */ - XMLCSTR getName() const; ///< name of the node - XMLCSTR getText(int i=0) const; ///< return ith text field - int nText() const; ///< nbr of text field - XMLNode getParentNode() const; ///< return the parent node - XMLNode getChildNode(int i=0) const; ///< return ith child node - XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name. - XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing) - XMLNode getChildNodeWithAttribute(XMLCSTR tagName, - XMLCSTR attributeName, - XMLCSTR attributeValue=NULL, - int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing) - XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path - XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path. - - int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name - int nChildNode() const; ///< nbr of child node - XMLAttribute getAttribute(int i=0) const; ///< return ith attribute - XMLCSTR getAttributeName(int i=0) const; ///< return ith attribute name - XMLCSTR getAttributeValue(int i=0) const; ///< return ith attribute value - char isAttributeSet(XMLCSTR name) const; ///< test if an attribute with a specific name is given - XMLCSTR getAttribute(XMLCSTR name, int i) const; ///< return ith attribute content with specific name (return a NULL if failing) - XMLCSTR getAttribute(XMLCSTR name, int *i=NULL) const; ///< return next attribute content with specific name (return a NULL if failing) - int nAttribute() const; ///< nbr of attribute - XMLClear getClear(int i=0) const; ///< return ith clear field (comments) - int nClear() const; ///< nbr of clear field - XMLNodeContents enumContents(XMLElementPosition i) const; ///< enumerate all the different contents (attribute,child,text, clear) of the current XMLNode. The order is reflecting the order of the original file/string. NOTE: 0 <= i < nElement(); - int nElement() const; ///< nbr of different contents for current node - char isEmpty() const; ///< is this node Empty? - char isDeclaration() const; ///< is this node a declaration - XMLNode deepCopy() const; ///< deep copy (duplicate/clone) a XMLNode - static XMLNode emptyNode(); ///< return XMLNode::emptyXMLNode; - /** @} */ - - ~XMLNode(); - XMLNode(const XMLNode &A); ///< to allow shallow/fast copy: - XMLNode& operator=( const XMLNode& A ); ///< to allow shallow/fast copy: - - XMLNode(): d(NULL){}; - static XMLNode emptyXMLNode; - static XMLClear emptyXMLClear; - static XMLAttribute emptyXMLAttribute; - - /** @defgroup xmlModify Create or Update the XMLNode structure - * @ingroup XMLParserGeneral - * The functions in this group allows you to create from scratch (or update) a XMLNode structure. Start by creating your top - * node with the "createXMLTopNode" function and then add new nodes with the "addChild" function. The parameter 'pos' gives - * the position where the childNode, the text or the XMLClearTag will be inserted. The default value (pos=-1) inserts at the - * end. The value (pos=0) insert at the beginning (Insertion at the beginning is slower than at the end).
    - * - * REMARK: 0 <= pos < nChild()+nText()+nClear()
    - */ - - /** @defgroup creation Creating from scratch a XMLNode structure - * @ingroup xmlModify - * @{ */ - static XMLNode createXMLTopNode(XMLCSTR lpszName, char isDeclaration=FALSE); ///< Create the top node of an XMLNode structure - XMLNode addChild(XMLCSTR lpszName, char isDeclaration=FALSE, XMLElementPosition pos=-1); ///< Add a new child node - XMLNode addChild(XMLNode nodeToAdd, XMLElementPosition pos=-1); ///< If the "nodeToAdd" has some parents, it will be detached from it's parents before being attached to the current XMLNode - XMLAttribute *addAttribute(XMLCSTR lpszName, XMLCSTR lpszValuev); ///< Add a new attribute - XMLCSTR addText(XMLCSTR lpszValue, XMLElementPosition pos=-1); ///< Add a new text content - XMLClear *addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen=NULL, XMLCSTR lpszClose=NULL, XMLElementPosition pos=-1); - /**< Add a new clear tag - * @param lpszOpen default value "" - */ - /** @} */ - - /** @defgroup xmlUpdate Updating Nodes - * @ingroup xmlModify - * Some update functions: - * @{ - */ - XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name - XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added - XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added - XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added - XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added - /** @} */ - - /** @defgroup xmlDelete Deleting Nodes or Attributes - * @ingroup xmlModify - * Some deletion functions: - * @{ - */ - /// The "deleteNodeContent" function forces the deletion of the content of this XMLNode and the subtree. - void deleteNodeContent(); - /**< \note The XMLNode instances that are referring to the part of the subtree that has been deleted CANNOT be used anymore!!. Unexpected results will occur if you continue using them. */ - void deleteAttribute(int i=0); ///< Delete the ith attribute of the current XMLNode - void deleteAttribute(XMLCSTR lpszName); ///< Delete the attribute with the given name (the "strcmp" function is used to find the right attribute) - void deleteAttribute(XMLAttribute *anAttribute); ///< Delete the attribute with the name "anAttribute->lpszName" (the "strcmp" function is used to find the right attribute) - void deleteText(int i=0); ///< Delete the Ith text content of the current XMLNode - void deleteText(XMLCSTR lpszValue); ///< Delete the text content "lpszValue" inside the current XMLNode (direct "pointer-to-pointer" comparison is used to find the right text) - void deleteClear(int i=0); ///< Delete the Ith clear tag inside the current XMLNode - void deleteClear(XMLCSTR lpszValue); ///< Delete the clear tag "lpszValue" inside the current XMLNode (direct "pointer-to-pointer" comparison is used to find the clear tag) - void deleteClear(XMLClear *p); ///< Delete the clear tag "p" inside the current XMLNode (direct "pointer-to-pointer" comparison on the lpszName of the clear tag is used to find the clear tag) - /** @} */ - - /** @defgroup xmlWOSD ???_WOSD functions. - * @ingroup xmlModify - * The strings given as parameters for the "add" and "update" methods that have a name with - * the postfix "_WOSD" (that means "WithOut String Duplication")(for example "addText_WOSD") - * will be free'd by the XMLNode class. For example, it means that this is incorrect: - * \code - * xNode.addText_WOSD("foo"); - * xNode.updateAttribute_WOSD("#newcolor" ,NULL,"color"); - * \endcode - * In opposition, this is correct: - * \code - * xNode.addText("foo"); - * xNode.addText_WOSD(stringDup("foo")); - * xNode.updateAttribute("#newcolor" ,NULL,"color"); - * xNode.updateAttribute_WOSD(stringDup("#newcolor"),NULL,"color"); - * \endcode - * Typically, you will never do: - * \code - * char *b=(char*)malloc(...); - * xNode.addText(b); - * free(b); - * \endcode - * ... but rather: - * \code - * char *b=(char*)malloc(...); - * xNode.addText_WOSD(b); - * \endcode - * ('free(b)' is performed by the XMLNode class) - * @{ */ - static XMLNode createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration=FALSE); ///< Create the top node of an XMLNode structure - XMLNode addChild_WOSD(XMLSTR lpszName, char isDeclaration=FALSE, XMLElementPosition pos=-1); ///< Add a new child node - XMLAttribute *addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValue); ///< Add a new attribute - XMLCSTR addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos=-1); ///< Add a new text content - XMLClear *addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen=NULL, XMLCSTR lpszClose=NULL, XMLElementPosition pos=-1); ///< Add a new clear Tag - - XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name - XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added - XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added - XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added - XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added - /** @} */ - - /** @defgroup xmlPosition Position helper functions (use in conjunction with the update&add functions - * @ingroup xmlModify - * These are some useful functions when you want to insert a childNode, a text or a XMLClearTag in the - * middle (at a specified position) of a XMLNode tree already constructed. The value returned by these - * methods is to be used as last parameter (parameter 'pos') of addChild, addText or addClear. - * @{ */ - XMLElementPosition positionOfText(int i=0) const; - XMLElementPosition positionOfText(XMLCSTR lpszValue) const; - XMLElementPosition positionOfClear(int i=0) const; - XMLElementPosition positionOfClear(XMLCSTR lpszValue) const; - XMLElementPosition positionOfClear(XMLClear *a) const; - XMLElementPosition positionOfChildNode(int i=0) const; - XMLElementPosition positionOfChildNode(XMLNode x) const; - XMLElementPosition positionOfChildNode(XMLCSTR name, int i=0) const; ///< return the position of the ith childNode with the specified name if (name==NULL) return the position of the ith childNode - /** @} */ - - /// Enumeration for XML character encoding. - typedef enum XMLCharEncoding - { - char_encoding_error=0, - char_encoding_UTF8=1, - char_encoding_legacy=2, - char_encoding_ShiftJIS=3, - char_encoding_GB2312=4, - char_encoding_Big5=5, - char_encoding_GBK=6 // this is actually the same as Big5 - } XMLCharEncoding; - - /** \addtogroup conversions - * @{ */ - - /// Sets the global options for the conversions - static char setGlobalOptions(XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8, char guessWideCharChars=1, - char dropWhiteSpace=1, char removeCommentsInMiddleOfText=1); - /**< The "setGlobalOptions" function allows you to change four global parameters that affect string & file - * parsing. First of all, you most-probably will never have to change these 3 global parameters. - * - * @param guessWideCharChars If "guessWideCharChars"=1 and if this library is compiled in WideChar mode, then the - * XMLNode::parseFile and XMLNode::openFileHelper functions will test if the file contains ASCII - * characters. If this is the case, then the file will be loaded and converted in memory to - * WideChar before being parsed. If 0, no conversion will be performed. - * - * @param guessWideCharChars If "guessWideCharChars"=1 and if this library is compiled in ASCII/UTF8/char* mode, then the - * XMLNode::parseFile and XMLNode::openFileHelper functions will test if the file contains WideChar - * characters. If this is the case, then the file will be loaded and converted in memory to - * ASCII/UTF8/char* before being parsed. If 0, no conversion will be performed. - * - * @param characterEncoding This parameter is only meaningful when compiling in char* mode (multibyte character mode). - * In wchar_t* (wide char mode), this parameter is ignored. This parameter should be one of the - * three currently recognized encodings: XMLNode::encoding_UTF8, XMLNode::encoding_ascii, - * XMLNode::encoding_ShiftJIS. - * - * @param dropWhiteSpace In most situations, text fields containing only white spaces (and carriage returns) - * are useless. Even more, these "empty" text fields are annoying because they increase the - * complexity of the user's code for parsing. So, 99% of the time, it's better to drop - * the "empty" text fields. However The XML specification indicates that no white spaces - * should be lost when parsing the file. So to be perfectly XML-compliant, you should set - * dropWhiteSpace=0. A note of caution: if you set "dropWhiteSpace=0", the parser will be - * slower and your code will be more complex. - * - * @param removeCommentsInMiddleOfText To explain this parameter, let's consider this code: - * \code - * XMLNode x=XMLNode::parseString("foobarchu","a"); - * \endcode - * If removeCommentsInMiddleOfText=0, then we will have: - * \code - * x.getText(0) -> "foo" - * x.getText(1) -> "bar" - * x.getText(2) -> "chu" - * x.getClear(0) --> "" - * x.getClear(1) --> "" - * \endcode - * If removeCommentsInMiddleOfText=1, then we will have: - * \code - * x.getText(0) -> "foobar" - * x.getText(1) -> "chu" - * x.getClear(0) --> "" - * \endcode - * - * \return "0" when there are no errors. If you try to set an unrecognized encoding then the return value will be "1" to signal an error. - * - * \note Sometime, it's useful to set "guessWideCharChars=0" to disable any conversion - * because the test to detect the file-type (ASCII/UTF8/char* or WideChar) may fail (rarely). */ - - /// Guess the character encoding of the string (ascii, utf8 or shift-JIS) - static XMLCharEncoding guessCharEncoding(void *buffer, int bufLen, char useXMLEncodingAttribute=1); - /**< The "guessCharEncoding" function try to guess the character encoding. You most-probably will never - * have to use this function. It then returns the appropriate value of the global parameter - * "characterEncoding" described in the XMLNode::setGlobalOptions. The guess is based on the content of a buffer of length - * "bufLen" bytes that contains the first bytes (minimum 25 bytes; 200 bytes is a good value) of the - * file to be parsed. The XMLNode::openFileHelper function is using this function to automatically compute - * the value of the "characterEncoding" global parameter. There are several heuristics used to do the - * guess. One of the heuristic is based on the "encoding" attribute. The original XML specifications - * forbids to use this attribute to do the guess but you can still use it if you set - * "useXMLEncodingAttribute" to 1 (this is the default behavior and the behavior of most parsers). - * If an inconsistency in the encoding is detected, then the return value is "0". */ - /** @} */ - - private: - // these are functions and structures used internally by the XMLNode class (don't bother about them): - - typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete): - { - XMLCSTR lpszName; // Element name (=NULL if root) - int nChild, // Number of child nodes - nText, // Number of text fields - nClear, // Number of Clear fields (comments) - nAttribute; // Number of attributes - char isDeclaration; // Whether node is an XML declaration - '' - struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) - XMLNode *pChild; // Array of child nodes - XMLCSTR *pText; // Array of text fields - XMLClear *pClear; // Array of clear fields - XMLAttribute *pAttribute; // Array of attributes - int *pOrder; // order of the child_nodes,text_fields,clear_fields - int ref_count; // for garbage collection (smart pointers) - } XMLNodeData; - XMLNodeData *d; - - char parseClearTag(void *px, void *pa); - char maybeAddTxT(void *pa, XMLCSTR tokenPStr); - int ParseXMLElement(void *pXML); - void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); - int indexText(XMLCSTR lpszValue) const; - int indexClear(XMLCSTR lpszValue) const; - XMLNode addChild_priv(int,XMLSTR,char,int); - XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR); - XMLCSTR addText_priv(int,XMLSTR,int); - XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int); - void emptyTheNode(char force); - static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype); - static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); - static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); - static void exactMemory(XMLNodeData *d); - static int detachFromParent(XMLNodeData *d); +typedef struct XMLDLLENTRY XMLNode { +private: + struct XMLNodeDataTag; + + /// Constructors are protected, so use instead one of: XMLNode::parseString, + /// XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode + XMLNode(struct XMLNodeDataTag *pParent, XMLSTR lpszName, char isDeclaration); + /// Constructors are protected, so use instead one of: XMLNode::parseString, + /// XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode + XMLNode(struct XMLNodeDataTag *p); + +public: + static XMLCSTR getVersion(); ///< Return the XMLParser library version number + + /** @defgroup conversions Parsing XML files/strings to an XMLNode structure + * and Rendering XMLNode's to files/string. + * @ingroup XMLParserGeneral + * @{ */ + + /// Parse an XML string and return the root of a XMLNode tree representing the + /// string. + static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); + /**< The "parseString" function parse an XML string and return the root of a + * XMLNode tree. The "opposite" of this function is the function + * "createXMLString" that re-creates an XML string from an XMLNode tree. If + * the XML document is corrupted, the "parseString" method will initialize the + * "pResults" variable with some information that can be used to trace the + * error. If you still want to parse the file, you can use the + * APPROXIMATE_PARSING option as explained inside the note at the beginning of + * the "xmlParser.cpp" file. + * + * @param lpXMLString the XML string to parse + * @param tag the name of the first tag inside the XML file. If the tag + * parameter is omitted, this function returns a node that represents the head + * of the xml document including the declaration term (). + * @param pResults a pointer to a XMLResults variable that will contain some + * information that can be used to trace the XML parsing error. You can have a + * user-friendly explanation of the parsing error with the "getError" + * function. + */ + + /// Parse an XML file and return the root of a XMLNode tree representing the + /// file. + static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); + /**< The "parseFile" function parse an XML file and return the root of a + * XMLNode tree. The "opposite" of this function is the function "writeToFile" + * that re-creates an XML file from an XMLNode tree. If the XML document is + * corrupted, the "parseFile" method will initialize the "pResults" variable + * with some information that can be used to trace the error. If you still + * want to parse the file, you can use the APPROXIMATE_PARSING option as + * explained inside the note at the beginning of the "xmlParser.cpp" file. + * + * @param filename the path to the XML file to parse + * @param tag the name of the first tag inside the XML file. If the tag + * parameter is omitted, this function returns a node that represents the head + * of the xml document including the declaration term (). + * @param pResults a pointer to a XMLResults variable that will contain some + * information that can be used to trace the XML parsing error. You can have a + * user-friendly explanation of the parsing error with the "getError" + * function. + */ + + /// Parse an XML file and return the root of a XMLNode tree representing the + /// file. A very crude error checking is made. An attempt to guess the Char + /// Encoding used in the file is made. + static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL); + /**< The "openFileHelper" function reports to the screen all the warnings and + * errors that occurred during parsing of the XML file. This function also + * tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first + * 200 bytes of the file. Since each application has its own way to report and + * deal with errors, you should rather use the "parseFile" function to parse + * XML files and program yourself thereafter an "error reporting" tailored for + * your needs (instead of using the very crude "error reporting" mechanism + * included inside the "openFileHelper" function). + * + * If the XML document is corrupted, the "openFileHelper" method will: + * - display an error message on the console (or inside a messageBox + * for windows). + * - stop execution (exit). + * + * I strongly suggest that you write your own "openFileHelper" method tailored + * to your needs. If you still want to parse the file, you can use the + * APPROXIMATE_PARSING option as explained inside the note at the beginning of + * the "xmlParser.cpp" file. + * + * @param filename the path of the XML file to parse. + * @param tag the name of the first tag inside the XML file. If the tag + * parameter is omitted, this function returns a node that represents the head + * of the xml document including the declaration term (). + */ + + static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly + ///< explanation of the parsing error + + /// Create an XML string starting from the current XMLNode. + XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const; + /**< The returned string should be free'd using the "freeXMLString" function. + * + * If nFormat==0, no formatting is required otherwise this returns an user + * friendly XML string from a given element + * with appropriate white spaces and carriage returns. if pnSize is given it + * returns the size in character of the string. */ + + /// Save the content of an xmlNode inside a file + XMLError writeToFile(XMLCSTR filename, const char *encoding = NULL, + char nFormat = 1) const; + /**< If nFormat==0, no formatting is required otherwise this returns an user + * friendly XML string from a given element with appropriate white spaces and + * carriage returns. If the global parameter + * "characterEncoding==encoding_UTF8", then the "encoding" parameter is + * ignored and always set to "utf-8". If the global parameter + * "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is + * ignored and always set to "SHIFT-JIS". If "_XMLWIDECHAR=1", then the + * "encoding" parameter is ignored and always set to "utf-16". If no + * "encoding" parameter is given the "ISO-8859-1" encoding is used. */ + /** @} */ + + /** @defgroup navigate Navigate the XMLNode structure + * @ingroup XMLParserGeneral + * @{ */ + XMLCSTR getName() const; ///< name of the node + XMLCSTR getText(int i = 0) const; ///< return ith text field + int nText() const; ///< nbr of text field + XMLNode getParentNode() const; ///< return the parent node + XMLNode getChildNode(int i = 0) const; ///< return ith child node + XMLNode getChildNode(XMLCSTR name, int i) + const; ///< return ith child node with specific name (return an empty node + ///< if failing). If i==-1, this returns the last XMLNode with the + ///< given name. + XMLNode + getChildNode(XMLCSTR name, + int *i = NULL) const; ///< return next child node with specific + ///< name (return an empty node if failing) + XMLNode getChildNodeWithAttribute( + XMLCSTR tagName, XMLCSTR attributeName, XMLCSTR attributeValue = NULL, + int *i = NULL) const; ///< return child node with specific name/attribute + ///< (return an empty node if failing) + XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing = 0, + XMLCHAR sep = '/'); + ///< return the first child node with specific path + XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing = 0, + XMLCHAR sep = '/'); + ///< return the first child node with specific path. + + int nChildNode(XMLCSTR name) + const; ///< return the number of child node with specific name + int nChildNode() const; ///< nbr of child node + XMLAttribute getAttribute(int i = 0) const; ///< return ith attribute + XMLCSTR getAttributeName(int i = 0) const; ///< return ith attribute name + XMLCSTR getAttributeValue(int i = 0) const; ///< return ith attribute value + char isAttributeSet(XMLCSTR name) + const; ///< test if an attribute with a specific name is given + XMLCSTR + getAttribute(XMLCSTR name, + int i) const; ///< return ith attribute content with specific + ///< name (return a NULL if failing) + XMLCSTR getAttribute(XMLCSTR name, int *i = NULL) + const; ///< return next attribute content with specific name (return a + ///< NULL if failing) + int nAttribute() const; ///< nbr of attribute + XMLClear getClear(int i = 0) const; ///< return ith clear field (comments) + int nClear() const; ///< nbr of clear field + XMLNodeContents enumContents(XMLElementPosition i) + const; ///< enumerate all the different contents (attribute,child,text, + ///< clear) of the current XMLNode. The order is reflecting the + ///< order of the original file/string. NOTE: 0 <= i < nElement(); + int nElement() const; ///< nbr of different contents for current node + char isEmpty() const; ///< is this node Empty? + char isDeclaration() const; ///< is this node a declaration + XMLNode deepCopy() const; ///< deep copy (duplicate/clone) a XMLNode + static XMLNode emptyNode(); ///< return XMLNode::emptyXMLNode; + /** @} */ + + ~XMLNode(); + XMLNode(const XMLNode &A); ///< to allow shallow/fast copy: + XMLNode &operator=(const XMLNode &A); ///< to allow shallow/fast copy: + + XMLNode() : d(NULL){}; + static XMLNode emptyXMLNode; + static XMLClear emptyXMLClear; + static XMLAttribute emptyXMLAttribute; + + /** @defgroup xmlModify Create or Update the XMLNode structure + * @ingroup XMLParserGeneral + * The functions in this group allows you to create from scratch (or update) + * a XMLNode structure. Start by creating your top node with the + * "createXMLTopNode" function and then add new nodes with the "addChild" + * function. The parameter 'pos' gives the position where the childNode, the + * text or the XMLClearTag will be inserted. The default value (pos=-1) + * inserts at the end. The value (pos=0) insert at the beginning (Insertion at + * the beginning is slower than at the end).
    + * + * REMARK: 0 <= pos < nChild()+nText()+nClear()
    + */ + + /** @defgroup creation Creating from scratch a XMLNode structure + * @ingroup xmlModify + * @{ */ + static XMLNode + createXMLTopNode(XMLCSTR lpszName, + char isDeclaration = + FALSE); ///< Create the top node of an XMLNode structure + XMLNode addChild(XMLCSTR lpszName, char isDeclaration = FALSE, + XMLElementPosition pos = -1); ///< Add a new child node + XMLNode addChild(XMLNode nodeToAdd, + XMLElementPosition pos = + -1); ///< If the "nodeToAdd" has some parents, it will be + ///< detached from it's parents before being + ///< attached to the current XMLNode + XMLAttribute *addAttribute(XMLCSTR lpszName, + XMLCSTR lpszValuev); ///< Add a new attribute + XMLCSTR addText(XMLCSTR lpszValue, + XMLElementPosition pos = -1); ///< Add a new text content + XMLClear *addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen = NULL, + XMLCSTR lpszClose = NULL, XMLElementPosition pos = -1); + /**< Add a new clear tag + * @param lpszOpen default value "" + */ + /** @} */ + + /** @defgroup xmlUpdate Updating Nodes + * @ingroup xmlModify + * Some update functions: + * @{ + */ + XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name + XMLAttribute *updateAttribute( + XMLAttribute *newAttribute, + XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a + ///< new one will be added + XMLAttribute * + updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName = NULL, + int i = 0); ///< if the attribute to update is missing, a new + ///< one will be added + XMLAttribute *updateAttribute( + XMLCSTR lpszNewValue, XMLCSTR lpszNewName, + XMLCSTR + lpszOldName); ///< set lpszNewName=NULL if you don't want to change + ///< the name of the attribute if the attribute to + ///< update is missing, a new one will be added + XMLCSTR updateText( + XMLCSTR lpszNewValue, + int i = 0); ///< if the text to update is missing, a new one will be added + XMLCSTR + updateText(XMLCSTR lpszNewValue, + XMLCSTR lpszOldValue); ///< if the text to update is missing, a new + ///< one will be added + XMLClear *updateClear(XMLCSTR lpszNewContent, + int i = 0); ///< if the clearTag to update is missing, a + ///< new one will be added + XMLClear *updateClear(XMLClear *newP, + XMLClear *oldP); ///< if the clearTag to update is + ///< missing, a new one will be added + XMLClear * + updateClear(XMLCSTR lpszNewValue, + XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, + ///< a new one will be added + /** @} */ + + /** @defgroup xmlDelete Deleting Nodes or Attributes + * @ingroup xmlModify + * Some deletion functions: + * @{ + */ + /// The "deleteNodeContent" function forces the deletion of the content of + /// this XMLNode and the subtree. + void deleteNodeContent(); + /**< \note The XMLNode instances that are referring to the part of the subtree + * that has been deleted CANNOT be used anymore!!. Unexpected results will + * occur if you continue using them. */ + void deleteAttribute( + int i = 0); ///< Delete the ith attribute of the current XMLNode + void + deleteAttribute(XMLCSTR lpszName); ///< Delete the attribute with the given + ///< name (the "strcmp" function is used to + ///< find the right attribute) + void deleteAttribute( + XMLAttribute + *anAttribute); ///< Delete the attribute with the name + ///< "anAttribute->lpszName" (the "strcmp" function is + ///< used to find the right attribute) + void + deleteText(int i = 0); ///< Delete the Ith text content of the current XMLNode + void deleteText( + XMLCSTR lpszValue); ///< Delete the text content "lpszValue" inside the + ///< current XMLNode (direct "pointer-to-pointer" + ///< comparison is used to find the right text) + void deleteClear( + int i = 0); ///< Delete the Ith clear tag inside the current XMLNode + void deleteClear( + XMLCSTR lpszValue); ///< Delete the clear tag "lpszValue" inside the + ///< current XMLNode (direct "pointer-to-pointer" + ///< comparison is used to find the clear tag) + void deleteClear( + XMLClear *p); ///< Delete the clear tag "p" inside the current XMLNode + ///< (direct "pointer-to-pointer" comparison on the lpszName + ///< of the clear tag is used to find the clear tag) + /** @} */ + + /** @defgroup xmlWOSD ???_WOSD functions. + * @ingroup xmlModify + * The strings given as parameters for the "add" and "update" methods that + * have a name with the postfix "_WOSD" (that means "WithOut String + * Duplication")(for example "addText_WOSD") will be free'd by the XMLNode + * class. For example, it means that this is incorrect: \code + * xNode.addText_WOSD("foo"); + * xNode.updateAttribute_WOSD("#newcolor" ,NULL,"color"); + * \endcode + * In opposition, this is correct: + * \code + * xNode.addText("foo"); + * xNode.addText_WOSD(stringDup("foo")); + * xNode.updateAttribute("#newcolor" ,NULL,"color"); + * xNode.updateAttribute_WOSD(stringDup("#newcolor"),NULL,"color"); + * \endcode + * Typically, you will never do: + * \code + * char *b=(char*)malloc(...); + * xNode.addText(b); + * free(b); + * \endcode + * ... but rather: + * \code + * char *b=(char*)malloc(...); + * xNode.addText_WOSD(b); + * \endcode + * ('free(b)' is performed by the XMLNode class) + * @{ */ + static XMLNode createXMLTopNode_WOSD( + XMLSTR lpszName, + char isDeclaration = + FALSE); ///< Create the top node of an XMLNode structure + XMLNode addChild_WOSD(XMLSTR lpszName, char isDeclaration = FALSE, + XMLElementPosition pos = -1); ///< Add a new child node + XMLAttribute *addAttribute_WOSD(XMLSTR lpszName, + XMLSTR lpszValue); ///< Add a new attribute + XMLCSTR addText_WOSD(XMLSTR lpszValue, + XMLElementPosition pos = -1); ///< Add a new text content + XMLClear *addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen = NULL, + XMLCSTR lpszClose = NULL, + XMLElementPosition pos = -1); ///< Add a new clear Tag + + XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name + XMLAttribute *updateAttribute_WOSD( + XMLAttribute *newAttribute, + XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a + ///< new one will be added + XMLAttribute * + updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName = NULL, + int i = 0); ///< if the attribute to update is missing, a + ///< new one will be added + XMLAttribute *updateAttribute_WOSD( + XMLSTR lpszNewValue, XMLSTR lpszNewName, + XMLCSTR + lpszOldName); ///< set lpszNewName=NULL if you don't want to change + ///< the name of the attribute if the attribute to + ///< update is missing, a new one will be added + XMLCSTR updateText_WOSD( + XMLSTR lpszNewValue, + int i = 0); ///< if the text to update is missing, a new one will be added + XMLCSTR + updateText_WOSD(XMLSTR lpszNewValue, + XMLCSTR lpszOldValue); ///< if the text to update is missing, + ///< a new one will be added + XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, + int i = 0); ///< if the clearTag to update is + ///< missing, a new one will be added + XMLClear * + updateClear_WOSD(XMLClear *newP, + XMLClear *oldP); ///< if the clearTag to update is missing, a + ///< new one will be added + XMLClear * + updateClear_WOSD(XMLSTR lpszNewValue, + XMLCSTR lpszOldValue); ///< if the clearTag to update is + ///< missing, a new one will be added + /** @} */ + + /** @defgroup xmlPosition Position helper functions (use in conjunction with + * the update&add functions + * @ingroup xmlModify + * These are some useful functions when you want to insert a childNode, a text + * or a XMLClearTag in the middle (at a specified position) of a XMLNode tree + * already constructed. The value returned by these methods is to be used as + * last parameter (parameter 'pos') of addChild, addText or addClear. + * @{ */ + XMLElementPosition positionOfText(int i = 0) const; + XMLElementPosition positionOfText(XMLCSTR lpszValue) const; + XMLElementPosition positionOfClear(int i = 0) const; + XMLElementPosition positionOfClear(XMLCSTR lpszValue) const; + XMLElementPosition positionOfClear(XMLClear *a) const; + XMLElementPosition positionOfChildNode(int i = 0) const; + XMLElementPosition positionOfChildNode(XMLNode x) const; + XMLElementPosition positionOfChildNode(XMLCSTR name, int i = 0) + const; ///< return the position of the ith childNode with the specified + ///< name if (name==NULL) return the position of the ith childNode + /** @} */ + + /// Enumeration for XML character encoding. + typedef enum XMLCharEncoding { + char_encoding_error = 0, + char_encoding_UTF8 = 1, + char_encoding_legacy = 2, + char_encoding_ShiftJIS = 3, + char_encoding_GB2312 = 4, + char_encoding_Big5 = 5, + char_encoding_GBK = 6 // this is actually the same as Big5 + } XMLCharEncoding; + + /** \addtogroup conversions + * @{ */ + + /// Sets the global options for the conversions + static char setGlobalOptions( + XMLCharEncoding characterEncoding = XMLNode::char_encoding_UTF8, + char guessWideCharChars = 1, char dropWhiteSpace = 1, + char removeCommentsInMiddleOfText = 1); + /**< The "setGlobalOptions" function allows you to change four global + * parameters that affect string & file parsing. First of all, you + * most-probably will never have to change these 3 global parameters. + * + * @param guessWideCharChars If "guessWideCharChars"=1 and if this library is + * compiled in WideChar mode, then the XMLNode::parseFile and + * XMLNode::openFileHelper functions will test if the file contains ASCII + * characters. If this is the case, then the file will be loaded and + * converted in memory to WideChar before being parsed. If 0, no conversion + * will be performed. + * + * @param guessWideCharChars If "guessWideCharChars"=1 and if this library is + * compiled in ASCII/UTF8/char* mode, then the XMLNode::parseFile and + * XMLNode::openFileHelper functions will test if the file contains WideChar + * characters. If this is the case, then the file will be loaded and + * converted in memory to ASCII/UTF8/char* before being parsed. If 0, no + * conversion will be performed. + * + * @param characterEncoding This parameter is only meaningful when compiling + * in char* mode (multibyte character mode). In wchar_t* (wide char mode), + * this parameter is ignored. This parameter should be one of the three + * currently recognized encodings: XMLNode::encoding_UTF8, + * XMLNode::encoding_ascii, XMLNode::encoding_ShiftJIS. + * + * @param dropWhiteSpace In most situations, text fields containing only white + * spaces (and carriage returns) are useless. Even more, these "empty" text + * fields are annoying because they increase the complexity of the user's code + * for parsing. So, 99% of the time, it's better to drop the "empty" text + * fields. However The XML specification indicates that no white spaces should + * be lost when parsing the file. So to be perfectly XML-compliant, you should + * set dropWhiteSpace=0. A note of caution: if you set "dropWhiteSpace=0", the + * parser will be slower and your code will be more complex. + * + * @param removeCommentsInMiddleOfText To explain this parameter, let's + * consider this code: \code XMLNode x=XMLNode::parseString("foobarchu","a"); \endcode If + * removeCommentsInMiddleOfText=0, then we will have: \code x.getText(0) -> + * "foo" x.getText(1) -> "bar" x.getText(2) -> "chu" x.getClear(0) --> "" x.getClear(1) --> "" \endcode If + * removeCommentsInMiddleOfText=1, then we will have: \code x.getText(0) -> + * "foobar" x.getText(1) -> "chu" x.getClear(0) --> "" + * \endcode + * + * \return "0" when there are no errors. If you try to set an unrecognized + * encoding then the return value will be "1" to signal an error. + * + * \note Sometime, it's useful to set "guessWideCharChars=0" to disable any + * conversion because the test to detect the file-type (ASCII/UTF8/char* or + * WideChar) may fail (rarely). */ + + /// Guess the character encoding of the string (ascii, utf8 or shift-JIS) + static XMLCharEncoding guessCharEncoding(void *buffer, int bufLen, + char useXMLEncodingAttribute = 1); + /**< The "guessCharEncoding" function try to guess the character encoding. You + * most-probably will never have to use this function. It then returns the + * appropriate value of the global parameter "characterEncoding" described in + * the XMLNode::setGlobalOptions. The guess is based on the content of a + * buffer of length "bufLen" bytes that contains the first bytes (minimum 25 + * bytes; 200 bytes is a good value) of the file to be parsed. The + * XMLNode::openFileHelper function is using this function to automatically + * compute the value of the "characterEncoding" global parameter. There are + * several heuristics used to do the guess. One of the heuristic is based on + * the "encoding" attribute. The original XML specifications forbids to use + * this attribute to do the guess but you can still use it if you set + * "useXMLEncodingAttribute" to 1 (this is the default behavior and the + * behavior of most parsers). + * If an inconsistency in the encoding is detected, then the return value is + * "0". */ + /** @} */ + +private: + // these are functions and structures used internally by the XMLNode class + // (don't bother about them): + + typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" + // pointers (automatic delete): + { + XMLCSTR lpszName; // Element name (=NULL if root) + int nChild, // Number of child nodes + nText, // Number of text fields + nClear, // Number of Clear fields (comments) + nAttribute; // Number of attributes + char isDeclaration; // Whether node is an XML declaration - '' + struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) + XMLNode *pChild; // Array of child nodes + XMLCSTR *pText; // Array of text fields + XMLClear *pClear; // Array of clear fields + XMLAttribute *pAttribute; // Array of attributes + int *pOrder; // order of the child_nodes,text_fields,clear_fields + int ref_count; // for garbage collection (smart pointers) + } XMLNodeData; + XMLNodeData *d; + + char parseClearTag(void *px, void *pa); + char maybeAddTxT(void *pa, XMLCSTR tokenPStr); + int ParseXMLElement(void *pXML); + void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, + XMLElementType xtype); + int indexText(XMLCSTR lpszValue) const; + int indexClear(XMLCSTR lpszValue) const; + XMLNode addChild_priv(int, XMLSTR, char, int); + XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR); + XMLCSTR addText_priv(int, XMLSTR, int); + XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int); + void emptyTheNode(char force); + static inline XMLElementPosition findPosition(XMLNodeData *d, int index, + XMLElementType xtype); + static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, + int nFormat); + static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); + static void exactMemory(XMLNodeData *d); + static int detachFromParent(XMLNodeData *d); } XMLNode; /// This structure is given by the function XMLNode::enumContents. -typedef struct XMLNodeContents -{ - /// This dictates what's the content of the XMLNodeContent - enum XMLElementType etype; - /**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */ - XMLNode child; - XMLAttribute attrib; - XMLCSTR text; - XMLClear clear; +typedef struct XMLNodeContents { + /// This dictates what's the content of the XMLNodeContent + enum XMLElementType etype; + /**< should be an union to access the appropriate data. Compiler does not + * allow union of object with constructor... too bad. */ + XMLNode child; + XMLAttribute attrib; + XMLCSTR text; + XMLClear clear; } XMLNodeContents; -/** @defgroup StringAlloc String Allocation/Free functions - * @ingroup xmlModify - * @{ */ +/** @defgroup StringAlloc String Allocation/Free functions + * @ingroup xmlModify + * @{ */ /// Duplicate (copy in a new allocated buffer) the source string. -XMLDLLENTRY XMLSTR stringDup(XMLCSTR source, int cbData=-1); -/**< This is - * a very handy function when used with all the "XMLNode::*_WOSD" functions (\link xmlWOSD \endlink). - * @param cbData If !=0 then cbData is the number of chars to duplicate. New strings allocated with - * this function should be free'd using the "freeXMLString" function. */ - -/// to free the string allocated inside the "stringDup" function or the "createXMLString" function. +XMLDLLENTRY XMLSTR stringDup(XMLCSTR source, int cbData = -1); +/**< This is + * a very handy function when used with all the "XMLNode::*_WOSD" functions + * (\link xmlWOSD \endlink). + * @param cbData If !=0 then cbData is the number of chars to duplicate. New + * strings allocated with this function should be free'd using the + * "freeXMLString" function. */ + +/// to free the string allocated inside the "stringDup" function or the +/// "createXMLString" function. XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);} -/** @} */ +/** @} */ /** @defgroup atoX ato? like functions - * @ingroup XMLParserGeneral + * @ingroup XMLParserGeneral * The "xmlto?" functions are equivalents to the atoi, atol, atof functions. - * The only difference is: If the variable "xmlString" is NULL, than the return value - * is "defautValue". These 6 functions are only here as "convenience" functions for the - * user (they are not used inside the XMLparser). If you don't need them, you can - * delete them without any trouble. + * The only difference is: If the variable "xmlString" is NULL, than the return + * value is "defautValue". These 6 functions are only here as "convenience" + * functions for the user (they are not used inside the XMLparser). If you don't + * need them, you can delete them without any trouble. * * @{ */ -XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0); -XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0); -XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0); -XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0); -XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML("")); -XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0')); +XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue = 0); +XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue = 0); +XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue = 0); +XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue = .0); +XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue = _CXML("")); +XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, + XMLCHAR defautValue = _CXML('\0')); /** @} */ -/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions. - * @ingroup XMLParserGeneral +/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", + * "fprintf", "cout",... functions. + * @ingroup XMLParserGeneral * @{ */ -/// Helper class to create XML files using "printf", "fprintf", "cout",... functions. -/** The ToXMLStringTool class helps you creating XML files using "printf", "fprintf", "cout",... functions. - * The "ToXMLStringTool" class is processing strings so that all the characters +/// Helper class to create XML files using "printf", "fprintf", "cout",... +/// functions. +/** The ToXMLStringTool class helps you creating XML files using "printf", + * "fprintf", "cout",... functions. The "ToXMLStringTool" class is processing + * strings so that all the characters * &,",',<,> are replaced by their XML equivalent: * \verbatim &, ", ', <, > \endverbatim - * Using the "ToXMLStringTool class" and the "fprintf function" is THE most efficient - * way to produce VERY large XML documents VERY fast. - * \note If you are creating from scratch an XML file using the provided XMLNode class - * you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the + * Using the "ToXMLStringTool class" and the "fprintf function" is THE most + * efficient way to produce VERY large XML documents VERY fast. \note If you are + * creating from scratch an XML file using the provided XMLNode class you must + * not use the "ToXMLStringTool" class (because the "XMLNode" class does the * processing job for you during rendering).*/ -typedef struct XMLDLLENTRY ToXMLStringTool -{ +typedef struct XMLDLLENTRY ToXMLStringTool { public: - ToXMLStringTool(): buf(NULL),buflen(0){} - ~ToXMLStringTool(); - void freeBuffer();/// Date: Mon, 8 Jun 2020 23:30:02 -0500 Subject: [PATCH 03/59] CMake: Converted from Make to CMake Switched to CMake so that CI tools can be used. --- .gitignore | 3 + CMakeLists.txt | 61 ++++++++++++++ cacti/CMakeLists.txt | 45 ++++++++++ cacti/cacti.mk | 51 ------------ cacti/makefile | 28 ------- cacti/nuca.cc | 2 +- cacti/nuca.h | 2 +- cacti/powergating.cc | 7 +- makefile | 28 ------- mcpat.mk | 82 ------------------- processor.cc | 30 +++---- .../golden/{T1.golden => Niagara1.golden} | 0 ...C_64.golden => Niagara1_sharing_DC.golden} | 0 ..._64.golden => Niagara1_sharing_SBT.golden} | 0 ...T_64.golden => Niagara1_sharing_ST.golden} | 0 .../golden/{T2.golden => Niagara2.golden} | 0 unit_test/unit_test.py | 2 +- 17 files changed, 131 insertions(+), 210 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 cacti/CMakeLists.txt delete mode 100644 cacti/cacti.mk delete mode 100644 cacti/makefile delete mode 100644 makefile delete mode 100644 mcpat.mk rename unit_test/golden/{T1.golden => Niagara1.golden} (100%) rename unit_test/golden/{T1_DC_64.golden => Niagara1_sharing_DC.golden} (100%) rename unit_test/golden/{T1_SBT_64.golden => Niagara1_sharing_SBT.golden} (100%) rename unit_test/golden/{T1_ST_64.golden => Niagara1_sharing_ST.golden} (100%) rename unit_test/golden/{T2.golden => Niagara2.golden} (100%) diff --git a/.gitignore b/.gitignore index f19f276..af60277 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,6 @@ obj_opt/ # Unit-Test unit_test/output + +# CMake +build* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f03f7d5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,61 @@ +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "DO NOT BUILD in-tree.") +endif() + +cmake_minimum_required (VERSION 3.1) + +project(mcpat DESCRIPTION "Power Timing Area Calculator" + LANGUAGES CXX) + +set (MCPAT_VERSION_MAJOR 1) +set (MCPAT_VERSION_MINOR 3) +set (MCPAT_VERSION_PATCH 0) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +#set(CMAKE_CXX_FLAGS "-Wall -Wextra") +#set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") +#set(CMAKE_CXX_FLAGS_RELEASE "-O3") +#set(CMAKE_C_COMPILER clang) +#set(CMAKE_CXX_COMPILER clang++) + +add_definitions(-DNTHREADS=1) + +add_subdirectory(cacti) + +add_executable(mcpat + XML_Parse.h + arch_const.h + array.h + basic_components.h + core.h + globalvar.h + interconnect.h + iocontrollers.h + logic.h + memoryctrl.h + noc.h + processor.h + sharedcache.h + version.h + xmlParser.h + XML_Parse.cc + array.cc + basic_components.cc + core.cc + interconnect.cc + iocontrollers.cc + logic.cc + main.cc + memoryctrl.cc + noc.cc + processor.cc + sharedcache.cc + xmlParser.cc +) + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) +target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads) diff --git a/cacti/CMakeLists.txt b/cacti/CMakeLists.txt new file mode 100644 index 0000000..d877551 --- /dev/null +++ b/cacti/CMakeLists.txt @@ -0,0 +1,45 @@ +add_library(cacti + Ucache.h + arbiter.h + area.h + bank.h + basic_circuit.h + cacti_interface.h + component.h + const.h + crossbar.h + decoder.h + htree2.h + io.h + mat.h + nuca.h + parameter.h + powergating.h + router.h + subarray.h + uca.h + version_cacti.h + wire.h + Ucache.cc + arbiter.cc + area.cc + bank.cc + basic_circuit.cc + cacti_interface.cc + component.cc + crossbar.cc + decoder.cc + htree2.cc + io.cc + main.cc + mat.cc + nuca.cc + parameter.cc + powergating.cc + router.cc + subarray.cc + technology.cc + uca.cc + wire.cc +) +target_include_directories(cacti PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/cacti/cacti.mk b/cacti/cacti.mk deleted file mode 100644 index 96d4ab9..0000000 --- a/cacti/cacti.mk +++ /dev/null @@ -1,51 +0,0 @@ -TARGET = cacti -SHELL = /bin/sh -.PHONY: all depend clean -.SUFFIXES: .cc .o - -ifndef NTHREADS - NTHREADS = 8 -endif - - -LIBS = -INCS = -lm - -ifeq ($(TAG),dbg) - DBG = -Wall - OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+ -else - DBG = - OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -endif - -#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) -CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) -CXX = g++ -m32 -CC = gcc -m32 - -SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \ - decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc \ - cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc powergating.cc - -OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) -PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc -PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS)) -INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config - -all: obj_$(TAG)/$(TARGET) - cp -f obj_$(TAG)/$(TARGET) $(TARGET) - -obj_$(TAG)/$(TARGET) : $(OBJS) - $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread - -#obj_$(TAG)/%.o : %.cc -# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< - -obj_$(TAG)/%.o : %.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -clean: - -rm -f *.o _cacti.so cacti.py $(TARGET) - - diff --git a/cacti/makefile b/cacti/makefile deleted file mode 100644 index 2728691..0000000 --- a/cacti/makefile +++ /dev/null @@ -1,28 +0,0 @@ -TAR = cacti - -.PHONY: dbg opt depend clean clean_dbg clean_opt - -all: opt - -dbg: $(TAR).mk obj_dbg - @$(MAKE) TAG=dbg -C . -f $(TAR).mk - -opt: $(TAR).mk obj_opt - @$(MAKE) TAG=opt -C . -f $(TAR).mk - -obj_dbg: - mkdir $@ - -obj_opt: - mkdir $@ - -clean: clean_dbg clean_opt - -clean_dbg: obj_dbg - @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean - rm -rf $< - -clean_opt: obj_opt - @$(MAKE) TAG=opt -C . -f $(TAR).mk clean - rm -rf $< - diff --git a/cacti/nuca.cc b/cacti/nuca.cc index d9a0bee..ec20575 100644 --- a/cacti/nuca.cc +++ b/cacti/nuca.cc @@ -44,7 +44,7 @@ unsigned int MIN_BANKSIZE = 65536; int cont_stats[2 /*l2 or l3*/][5 /* cores */][ROUTER_TYPES][7 /*banks*/] [8 /* cycle time */]; -Nuca::Nuca(TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)) +Nuca::Nuca(TechnologyParameter::DeviceType *dt) : deviceType(dt) { init_cont(); } diff --git a/cacti/nuca.h b/cacti/nuca.h index 1e6f8bc..965b93b 100644 --- a/cacti/nuca.h +++ b/cacti/nuca.h @@ -70,7 +70,7 @@ class nuca_org_t { class Nuca : public Component { public: - Nuca(TechnologyParameter::DeviceType *dt); + Nuca(TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); void print_router(); ~Nuca(); void sim_nuca(); diff --git a/cacti/powergating.cc b/cacti/powergating.cc index cf0ec8b..04613f1 100644 --- a/cacti/powergating.cc +++ b/cacti/powergating.cc @@ -34,9 +34,9 @@ #include "area.h" #include "parameter.h" -#include +#include #include -#include +#include using namespace std; @@ -115,7 +115,7 @@ Sleep_tx::Sleep_tx(double _perf_with_sleep_tx, compute_penalty(); } -double Sleep_tx::compute_penalty() { +auto Sleep_tx::compute_penalty() -> double { // V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although // it might be OK to use sleep tx to control the V_delta double c_load; @@ -155,4 +155,5 @@ double Sleep_tx::compute_penalty() { just the wakeup latency will be shorter than the wakeup time from full asleep. So, the sleep time and energy does not matter */ + return 0.0; } diff --git a/makefile b/makefile deleted file mode 100644 index 27f213f..0000000 --- a/makefile +++ /dev/null @@ -1,28 +0,0 @@ -TAR = mcpat - -.PHONY: dbg opt depend clean clean_dbg clean_opt - -all: opt - -dbg: $(TAR).mk obj_dbg - @$(MAKE) TAG=dbg -C . -f $(TAR).mk - -opt: $(TAR).mk obj_opt - @$(MAKE) TAG=opt -C . -f $(TAR).mk - -obj_dbg: - mkdir $@ - -obj_opt: - mkdir $@ - -clean: clean_dbg clean_opt - -clean_dbg: obj_dbg - @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean - rm -rf $< - -clean_opt: obj_opt - @$(MAKE) TAG=opt -C . -f $(TAR).mk clean - rm -rf $< - diff --git a/mcpat.mk b/mcpat.mk deleted file mode 100644 index f2c4fd5..0000000 --- a/mcpat.mk +++ /dev/null @@ -1,82 +0,0 @@ -TARGET = mcpat -SHELL = /bin/sh -.PHONY: all depend clean -.SUFFIXES: .cc .o - -ifndef NTHREADS - NTHREADS = 4 -endif - - -LIBS = -INCS = -lm - -ifeq ($(TAG),dbg) - DBG = -Wall - OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti -else - DBG = - OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti - #OPT = -O0 -DNTHREADS=$(NTHREADS) -endif - -#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) -CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) -CXX = g++ -m32 -CC = gcc -m32 - -VPATH = cacti - -SRCS = \ - Ucache.cc \ - XML_Parse.cc \ - arbiter.cc \ - area.cc \ - array.cc \ - bank.cc \ - basic_circuit.cc \ - basic_components.cc \ - cacti_interface.cc \ - component.cc \ - core.cc \ - crossbar.cc \ - decoder.cc \ - htree2.cc \ - interconnect.cc \ - io.cc \ - iocontrollers.cc \ - logic.cc \ - main.cc \ - mat.cc \ - memoryctrl.cc \ - noc.cc \ - nuca.cc \ - parameter.cc \ - processor.cc \ - router.cc \ - sharedcache.cc \ - subarray.cc \ - technology.cc \ - uca.cc \ - wire.cc \ - xmlParser.cc \ - powergating.cc - -OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) - -all: obj_$(TAG)/$(TARGET) - cp -f obj_$(TAG)/$(TARGET) $(TARGET) - -obj_$(TAG)/$(TARGET) : $(OBJS) - $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread - -#obj_$(TAG)/%.o : %.cc -# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< - -obj_$(TAG)/%.o : %.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -clean: - -rm -f *.o $(TARGET) - - diff --git a/processor.cc b/processor.cc index 8c5cdde..1f57c8b 100644 --- a/processor.cc +++ b/processor.cc @@ -38,16 +38,16 @@ #include "version.h" #include -#include +#include #include #include #include -#include -#include +#include +#include Processor::Processor(ParseXML *XML_interface) : XML(XML_interface), // TODO: using one global copy may have problems. - mc(0), niu(0), pcie(0), flashcontroller(0) { + mc(nullptr), niu(nullptr), pcie(nullptr), flashcontroller(nullptr) { /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory @@ -905,12 +905,12 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { void Processor::set_proc_param() { bool debug = false; - procdynp.homoCore = bool(debug ? 1 : XML->sys.homogeneous_cores); - procdynp.homoL2 = bool(debug ? 1 : XML->sys.homogeneous_L2s); - procdynp.homoL3 = bool(debug ? 1 : XML->sys.homogeneous_L3s); - procdynp.homoNOC = bool(debug ? 1 : XML->sys.homogeneous_NoCs); - procdynp.homoL1Dir = bool(debug ? 1 : XML->sys.homogeneous_L1Directories); - procdynp.homoL2Dir = bool(debug ? 1 : XML->sys.homogeneous_L2Directories); + procdynp.homoCore = bool(debug ? true : XML->sys.homogeneous_cores); + procdynp.homoL2 = bool(debug ? true : XML->sys.homogeneous_L2s); + procdynp.homoL3 = bool(debug ? true : XML->sys.homogeneous_L3s); + procdynp.homoNOC = bool(debug ? true : XML->sys.homogeneous_NoCs); + procdynp.homoL1Dir = bool(debug ? true : XML->sys.homogeneous_L1Directories); + procdynp.homoL2Dir = bool(debug ? true : XML->sys.homogeneous_L2Directories); procdynp.numCore = XML->sys.number_of_cores; procdynp.numL2 = XML->sys.number_of_L2s; @@ -986,7 +986,7 @@ void Processor::set_proc_param() { interface_ip.assoc = 1; interface_ip.nbanks = 1; interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.specific_tag = 1; + interface_ip.specific_tag = true; interface_ip.tag_w = 64; interface_ip.access_mode = 2; @@ -1054,18 +1054,18 @@ Processor::~Processor() { } if (mc) { delete mc; - mc = 0; + mc = nullptr; } if (niu) { delete niu; - niu = 0; + niu = nullptr; } if (pcie) { delete pcie; - pcie = 0; + pcie = nullptr; } if (flashcontroller) { delete flashcontroller; - flashcontroller = 0; + flashcontroller = nullptr; } }; diff --git a/unit_test/golden/T1.golden b/unit_test/golden/Niagara1.golden similarity index 100% rename from unit_test/golden/T1.golden rename to unit_test/golden/Niagara1.golden diff --git a/unit_test/golden/T1_DC_64.golden b/unit_test/golden/Niagara1_sharing_DC.golden similarity index 100% rename from unit_test/golden/T1_DC_64.golden rename to unit_test/golden/Niagara1_sharing_DC.golden diff --git a/unit_test/golden/T1_SBT_64.golden b/unit_test/golden/Niagara1_sharing_SBT.golden similarity index 100% rename from unit_test/golden/T1_SBT_64.golden rename to unit_test/golden/Niagara1_sharing_SBT.golden diff --git a/unit_test/golden/T1_ST_64.golden b/unit_test/golden/Niagara1_sharing_ST.golden similarity index 100% rename from unit_test/golden/T1_ST_64.golden rename to unit_test/golden/Niagara1_sharing_ST.golden diff --git a/unit_test/golden/T2.golden b/unit_test/golden/Niagara2.golden similarity index 100% rename from unit_test/golden/T2.golden rename to unit_test/golden/Niagara2.golden diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index 16059d3..b2ff167 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -107,7 +107,7 @@ def run_test(vector): stde = os.path.join(output_path, vector + ".err") with open(stdo, "w") as so, open(stde, "w") as se: p = subprocess.Popen([ - "../mcpat", "-infile", infile, "-print_level", "5", "-opt_for_clk", "1" + "../build/mcpat", "-infile", infile, "-print_level", "5", "-opt_for_clk", "1" ], stdout=so, stderr=se) From 6e6b3eb3ff92ac98c227effdd9743c3711cd66c1 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 08:54:47 -0500 Subject: [PATCH 04/59] git-hooks: Pre commit clang-format test --- CMakeLists.txt | 15 +++++++++------ array.cc | 2 +- cacti/nuca.cc | 3 +-- cacti/powergating.cc | 2 +- processor.cc | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f03f7d5..fddab45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) - message(FATAL_ERROR "DO NOT BUILD in-tree.") + message(FATAL_ERROR "Create a separate build directory") endif() cmake_minimum_required (VERSION 3.1) @@ -15,11 +15,14 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -#set(CMAKE_CXX_FLAGS "-Wall -Wextra") -#set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") -#set(CMAKE_CXX_FLAGS_RELEASE "-O3") -#set(CMAKE_C_COMPILER clang) -#set(CMAKE_CXX_COMPILER clang++) +#Including custom CMake rules +include(cmake/clang-cxx-dev-tools.cmake) + +set(CMAKE_CXX_FLAGS "-Wall -Wextra") +set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") +set(CMAKE_CXX_FLAGS_RELEASE "-O3") +set(CMAKE_C_COMPILER clang) +set(CMAKE_CXX_COMPILER clang++) add_definitions(-DNTHREADS=1) diff --git a/array.cc b/array.cc index 86f60d6..c12662c 100644 --- a/array.cc +++ b/array.cc @@ -209,7 +209,7 @@ void ArrayST::optimize_array() { // below //"< -#include #include +#include using namespace std; diff --git a/processor.cc b/processor.cc index 1f57c8b..cf9ba5a 100644 --- a/processor.cc +++ b/processor.cc @@ -40,10 +40,10 @@ #include #include #include -#include -#include #include #include +#include +#include Processor::Processor(ParseXML *XML_interface) : XML(XML_interface), // TODO: using one global copy may have problems. From 65beb917803ea90b1856e5b6873a5422afbe7940 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 10:31:55 -0500 Subject: [PATCH 05/59] git-hooks: Testing clang-format again Also cleaned up the build warnings --- .clang-format | 4 +- CMakeLists.txt | 5 +- cacti/arbiter.h | 2 +- cacti/crossbar.cc | 2 +- cacti/decoder.cc | 6 +- cacti/powergating.cc | 18 +-- cacti/powergating.h | 8 +- core.cc | 16 +- interconnect.cc | 2 +- logic.cc | 2 +- unit_test/unit_test.py | 3 +- unit_test/unit_test.sh | 53 +------ util/format.sh | 12 +- util/run-clang-tidy.py | 326 ----------------------------------------- 14 files changed, 45 insertions(+), 414 deletions(-) delete mode 100755 util/run-clang-tidy.py diff --git a/.clang-format b/.clang-format index 765a1ab..08fd74f 100644 --- a/.clang-format +++ b/.clang-format @@ -43,10 +43,10 @@ BraceWrapping: SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach -BreakBeforeInheritanceComma: false +# BreakBeforeInheritanceComma: false BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false +# BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true diff --git a/CMakeLists.txt b/CMakeLists.txt index fddab45..e0a14fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,10 +15,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() -#Including custom CMake rules -include(cmake/clang-cxx-dev-tools.cmake) - -set(CMAKE_CXX_FLAGS "-Wall -Wextra") +set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function") set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_C_COMPILER clang) diff --git a/cacti/arbiter.h b/cacti/arbiter.h index c924a95..0a0acc9 100644 --- a/cacti/arbiter.h +++ b/cacti/arbiter.h @@ -64,7 +64,7 @@ class Arbiter : public Component { double NTtr, PTtr; double o_len; TechnologyParameter::DeviceType *deviceType; - double TriS1, TriS2; + // double TriS1, TriS2; double min_w_pmos, Vdd; }; diff --git a/cacti/crossbar.cc b/cacti/crossbar.cc index d6cf098..744a82d 100644 --- a/cacti/crossbar.cc +++ b/cacti/crossbar.cc @@ -166,7 +166,7 @@ void Crossbar::compute_power() { deviceType->Vth / deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE); - Wire wreset(); + Wire wreset(1, 1); } void Crossbar::print_crossbar() { diff --git a/cacti/decoder.cc b/cacti/decoder.cc index ace6156..7573058 100644 --- a/cacti/decoder.cc +++ b/cacti/decoder.cc @@ -48,8 +48,8 @@ Decoder::Decoder(int _num_dec_signals, bool flag_way_select, R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), // power(), fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), - total_driver_nwidth(0), total_driver_pwidth(0), cell(cell_), - power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { + total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), + cell(cell_), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; @@ -1458,7 +1458,7 @@ Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), // power(), is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0), - power_gating(power_gating_), nodes_DSTN(nodes_DSTN_), sleeptx(NULL) { + sleeptx(NULL), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_n[i] = 0; width_p[i] = 0; diff --git a/cacti/powergating.cc b/cacti/powergating.cc index a2774b4..a0f62d6 100644 --- a/cacti/powergating.cc +++ b/cacti/powergating.cc @@ -73,19 +73,19 @@ Sleep_tx::Sleep_tx(double _perf_with_sleep_tx, double _active_Isat, // of circuit block, not sleep tx bool _is_footer, double _c_circuit_wakeup, double _V_delta, int _num_sleep_tx, - // double _vt_circuit, - // double _vt_sleep_tx, - // double _mobility,//of sleep tx - // double _c_ox,//of sleep tx + // double _vt_circuit, + // double _vt_sleep_tx, + // double _mobility,//of sleep tx + // double _c_ox,//of sleep tx const Area &cell_) : perf_with_sleep_tx(_perf_with_sleep_tx), active_Isat(_active_Isat), - is_footer(_is_footer), c_circuit_wakeup(_c_circuit_wakeup), - V_delta(_V_delta), num_sleep_tx(_num_sleep_tx), + is_footer(_is_footer), num_sleep_tx(_num_sleep_tx), + c_circuit_wakeup(_c_circuit_wakeup), // vt_circuit(_vt_circuit), // vt_sleep_tx(_vt_sleep_tx), // mobility(_mobility), // c_ox(_c_ox) - cell(cell_), is_sleep_tx(true) { + cell(cell_), is_sleep_tx(true), V_delta(_V_delta) { // a single sleep tx in a network double raw_area, raw_width, raw_hight; @@ -124,7 +124,7 @@ auto Sleep_tx::compute_penalty() -> double { if (is_footer) { c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false, is_sleep_tx); - // V_delta = _V_delta; + // V_delta = _V_delta; wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep) * V_delta / (simplified_nmos_Isat(width, false, false, false, is_sleep_tx) / @@ -136,7 +136,7 @@ auto Sleep_tx::compute_penalty() -> double { } else { c_intrinsic_sleep = drain_C_(width * p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false, is_sleep_tx); - // V_delta = _V_delta; + // V_delta = _V_delta; wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep) * V_delta / (simplified_pmos_Isat(width, false, false, false, is_sleep_tx) / diff --git a/cacti/powergating.h b/cacti/powergating.h index 72415d8..b2df692 100644 --- a/cacti/powergating.h +++ b/cacti/powergating.h @@ -40,10 +40,10 @@ class Sleep_tx : public Component { double _active_Isat, // of circuit block, not sleep tx bool _is_footer, double _c_circuit_wakeup, double _V_delta, int _num_sleep_tx, - // double _vt_circuit, - // double _vt_sleep_tx, - // double _mobility,//of sleep tx - // double _c_ox,//of sleep tx + // double _vt_circuit, + // double _vt_sleep_tx, + // double _mobility,//of sleep tx + // double _c_ox,//of sleep tx const Area &cell_); double perf_with_sleep_tx; diff --git a/core.cc b/core.cc index eda0210..0ba101d 100644 --- a/core.cc +++ b/core.cc @@ -1728,7 +1728,7 @@ used for index the RAT entry to be updated. fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); area.set_area(area.get_area() + fFRAT->area.get_area()); - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { // FRAT tag = coredynp.arch_ireg_width + coredynp.hthread_width; data = int( @@ -2012,7 +2012,7 @@ used for index the RAT entry to be updated. fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); area.set_area(area.get_area() + fFRAT->area.get_area()); - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { // FRAT tag = coredynp.arch_ireg_width + coredynp.hthread_width; data = int(ceil( @@ -2953,7 +2953,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; fFRAT->tdp_stats = fFRAT->stats_t; - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; iFRAT->tdp_stats = iFRAT->stats_t; @@ -2992,7 +2992,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; fFRAT->tdp_stats = fFRAT->stats_t; - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; iFRAT->tdp_stats = iFRAT->stats_t; @@ -3043,7 +3043,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; fFRAT->rtp_stats = fFRAT->stats_t; - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; iFRAT->rtp_stats = iFRAT->stats_t; @@ -3093,7 +3093,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { // fFRAT->stats_t.searchAc.access = // XML->sys.core[ithCore].committed_fp_instructions; fFRAT->rtp_stats = fFRAT->stats_t; - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; iFRAT->rtp_stats = iFRAT->stats_t; @@ -3167,7 +3167,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { fdcl->power.readOp.dynamic) + fFRAT->stats_t.writeAc.access * fFRAT->local_result.power.writeOp.dynamic); - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->power_t.reset(); fFRAT->power_t.reset(); iFRAT->power_t.readOp.dynamic += @@ -3229,7 +3229,7 @@ void RENAMINGU::computeEnergy(bool is_tdp) { fdcl->power.readOp.dynamic) + fFRAT->stats_t.writeAc.access * fFRAT->local_result.power.writeOp.dynamic); - } else if ((coredynp.rm_ty == CAMbased)) { + } else if (coredynp.rm_ty == CAMbased) { iFRAT->power_t.reset(); fFRAT->power_t.reset(); iFRAT->power_t.readOp.dynamic += diff --git a/interconnect.cc b/interconnect.cc index 19a5847..b186b63 100644 --- a/interconnect.cc +++ b/interconnect.cc @@ -151,7 +151,7 @@ interconnect::interconnect(string name_, enum Device_ty device_ty_, area.set_area(area.get_area() * route_over_perc + no_device_under_wire_area.get_area() * (1 - route_over_perc)); - Wire wreset(); + Wire wreset(1, 1); } void interconnect::compute() { diff --git a/logic.cc b/logic.cc index 8ae826e..452a43f 100644 --- a/logic.cc +++ b/logic.cc @@ -754,7 +754,7 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, void FunctionalUnit::computeEnergy(bool is_tdp) { double pppm_t[4] = {1, 1, 1, 1}; - double FU_duty_cycle; + double FU_duty_cycle = 0.0; if (is_tdp) { set_pppm(pppm_t, 2, 2, 2, 2); // 2 means two source operands needs to be diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index b2ff167..2fe2780 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -107,7 +107,8 @@ def run_test(vector): stde = os.path.join(output_path, vector + ".err") with open(stdo, "w") as so, open(stde, "w") as se: p = subprocess.Popen([ - "../build/mcpat", "-infile", infile, "-print_level", "5", "-opt_for_clk", "1" + "../build/mcpat", "-infile", infile, "-print_level", "5", + "-opt_for_clk", "1" ], stdout=so, stderr=se) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 3dbee0b..708510f 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -26,26 +26,6 @@ print_info () { echo -e "[ $script_name ] $1" } -print_pass () { - green="\e[32m" - nc="\e[0m" - echo -e "$green[ $script_name ] PASS:$nc $1" -} - -print_error () { - red="\e[31m" - nc="\e[0m" - echo -e "$red[ $script_name ] ERROR:$nc $1" -} - -print_test_results () { - green="\e[32m" - red="\e[31m" - nc="\e[0m" - echo -e "[ $script_name ] Passed $green$1$nc; Failed $red$2$nc; out of $3 Unit Tests" -} - - #-------------------------------------------------------------------- # Output Directories # ___ _ _ _____ ____ _ _ _____ ____ ___ ____ @@ -64,8 +44,6 @@ else rm -f $OUTPUT/* fi -GOLDEN="./golden" - #-------------------------------------------------------------------- # Run Tests # _____ _____ ____ _____ ____ @@ -75,33 +53,4 @@ GOLDEN="./golden" # |_| |_____|____/ |_| |____/ # #-------------------------------------------------------------------- -INPUT="./input" -PASS_COUNT=0 -TOTAL_COUNT=0 -FAIL_COUNT=0 -for t in $(ls $INPUT); do - test_name=$(basename $t .xml) - TOTAL_COUNT=$((TOTAL_COUNT + 1)) - ../mcpat -infile $INPUT/$test_name.xml -print_level 5 -opt_for_clk 1 > $OUTPUT/$test_name.out 2> $OUTPUT/$test_name.err - if [ -s $OUTPUT/$test_name.err ] || [ ! -s $OUTPUT/$test_name.out ]; - then - print_error "$test_name; check $OUTPUT/$test_name.err" - FAIL_COUNT=$((FAIL_COUNT + 1)) - else - if [ $(grep -rnI "nan\|inf" $OUTPUT/${test_name}.out | wc -l) -ne 0 ]; - then - print_pass "$test_name; nan, inf present in output; check $OUTPUT/$test_name.out" - FAIL_COUNT=$((FAIL_COUNT + 1)) - else - if [ $(diff $GOLDEN/$test_name.golden $OUTPUT/$test_name.out | wc -l) -eq 0 ]; - then - print_pass "$test_name" - PASS_COUNT=$((PASS_COUNT + 1)) - else - print_error "$test_name; output differs from golden output" - FAIL_COUNT=$((FAIL_COUNT + 1)) - fi - fi - fi -done -print_test_results $PASS_COUNT $FAIL_COUNT $TOTAL_COUNT +./unit_test.py diff --git a/util/format.sh b/util/format.sh index ab3c419..4fffc52 100755 --- a/util/format.sh +++ b/util/format.sh @@ -1,7 +1,17 @@ #!/bin/bash +SCRIPT="$(readlink -f $0)" +SCRIPT_PATH="$(dirname $SCRIPT)" +SRC_PATH="$SCRIPT_PATH/.." + # Format C Code: -find -name '*.cpp' -o -name '*.h' -o -name '*.hh' -o -name '*.c' -o -name '*.cc' | xargs clang-format -i --verbose +find $SRC_PATH -name '*.cpp' \ + -o -name '*.h' \ + -o -name '*.hh' \ + -o -name '*.c' \ + -o -name '*.cc' \ + | grep -vE "build" \ + | xargs clang-format -i --verbose # Format Python Code: yapf -ir -vv . diff --git a/util/run-clang-tidy.py b/util/run-clang-tidy.py deleted file mode 100755 index 1eb1352..0000000 --- a/util/run-clang-tidy.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python -# -#===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===# -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===------------------------------------------------------------------------===# -# FIXME: Integrate with clang-tidy-diff.py - -""" -Parallel clang-tidy runner -========================== - -Runs clang-tidy over all files in a compilation database. Requires clang-tidy -and clang-apply-replacements in $PATH. - -Example invocations. -- Run clang-tidy on all files in the current working directory with a default - set of checks and show warnings in the cpp files and all project headers. - run-clang-tidy.py $PWD - -- Fix all header guards. - run-clang-tidy.py -fix -checks=-*,llvm-header-guard - -- Fix all header guards included from clang-tidy and header guards - for clang-tidy headers. - run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ - -header-filter=extra/clang-tidy - -Compilation database setup: -http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html -""" - -from __future__ import print_function - -import argparse -import glob -import json -import multiprocessing -import os -import re -import shutil -import subprocess -import sys -import tempfile -import threading -import traceback - -try: - import yaml -except ImportError: - yaml = None - -is_py2 = sys.version[0] == '2' - -if is_py2: - import Queue as queue -else: - import queue as queue - -def find_compilation_database(path): - """Adjusts the directory until a compilation database is found.""" - result = './' - while not os.path.isfile(os.path.join(result, path)): - if os.path.realpath(result) == '/': - print('Error: could not find compilation database.') - sys.exit(1) - result += '../' - return os.path.realpath(result) - - -def make_absolute(f, directory): - if os.path.isabs(f): - return f - return os.path.normpath(os.path.join(directory, f)) - - -def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, - header_filter, extra_arg, extra_arg_before, quiet, - config): - """Gets a command line for clang-tidy.""" - start = [clang_tidy_binary] - if header_filter is not None: - start.append('-header-filter=' + header_filter) - if checks: - start.append('-checks=' + checks) - if tmpdir is not None: - start.append('-export-fixes') - # Get a temporary file. We immediately close the handle so clang-tidy can - # overwrite it. - (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) - os.close(handle) - start.append(name) - for arg in extra_arg: - start.append('-extra-arg=%s' % arg) - for arg in extra_arg_before: - start.append('-extra-arg-before=%s' % arg) - start.append('-p=' + build_path) - if quiet: - start.append('-quiet') - if config: - start.append('-config=' + config) - start.append(f) - return start - - -def merge_replacement_files(tmpdir, mergefile): - """Merge all replacement files in a directory into a single file""" - # The fixes suggested by clang-tidy >= 4.0.0 are given under - # the top level key 'Diagnostics' in the output yaml files - mergekey="Diagnostics" - merged=[] - for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): - content = yaml.safe_load(open(replacefile, 'r')) - if not content: - continue # Skip empty files. - merged.extend(content.get(mergekey, [])) - - if merged: - # MainSourceFile: The key is required by the definition inside - # include/clang/Tooling/ReplacementsYaml.h, but the value - # is actually never used inside clang-apply-replacements, - # so we set it to '' here. - output = { 'MainSourceFile': '', mergekey: merged } - with open(mergefile, 'w') as out: - yaml.safe_dump(output, out) - else: - # Empty the file: - open(mergefile, 'w').close() - - -def check_clang_apply_replacements_binary(args): - """Checks if invoking supplied clang-apply-replacements binary works.""" - try: - subprocess.check_call([args.clang_apply_replacements_binary, '--version']) - except: - print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' - 'binary correctly specified?', file=sys.stderr) - traceback.print_exc() - sys.exit(1) - - -def apply_fixes(args, tmpdir): - """Calls clang-apply-fixes on a given directory.""" - invocation = [args.clang_apply_replacements_binary] - if args.format: - invocation.append('-format') - if args.style: - invocation.append('-style=' + args.style) - invocation.append(tmpdir) - subprocess.call(invocation) - - -def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): - """Takes filenames out of queue and runs clang-tidy on them.""" - while True: - name = queue.get() - invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, - tmpdir, build_path, args.header_filter, - args.extra_arg, args.extra_arg_before, - args.quiet, args.config) - - proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, err = proc.communicate() - if proc.returncode != 0: - failed_files.append(name) - with lock: - sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) - if len(err) > 0: - sys.stdout.flush() - sys.stderr.write(err.decode('utf-8')) - queue.task_done() - - -def main(): - parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' - 'in a compilation database. Requires ' - 'clang-tidy and clang-apply-replacements in ' - '$PATH.') - parser.add_argument('-clang-tidy-binary', metavar='PATH', - default='clang-tidy', - help='path to clang-tidy binary') - parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', - default='clang-apply-replacements', - help='path to clang-apply-replacements binary') - parser.add_argument('-checks', default=None, - help='checks filter, when not specified, use clang-tidy ' - 'default') - parser.add_argument('-config', default=None, - help='Specifies a configuration in YAML/JSON format: ' - ' -config="{Checks: \'*\', ' - ' CheckOptions: [{key: x, ' - ' value: y}]}" ' - 'When the value is empty, clang-tidy will ' - 'attempt to find a file named .clang-tidy for ' - 'each source file in its parent directories.') - parser.add_argument('-header-filter', default=None, - help='regular expression matching the names of the ' - 'headers to output diagnostics from. Diagnostics from ' - 'the main file of each translation unit are always ' - 'displayed.') - if yaml: - parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', - help='Create a yaml file to store suggested fixes in, ' - 'which can be applied with clang-apply-replacements.') - parser.add_argument('-j', type=int, default=0, - help='number of tidy instances to be run in parallel.') - parser.add_argument('files', nargs='*', default=['.*'], - help='files to be processed (regex on path)') - parser.add_argument('-fix', action='store_true', help='apply fix-its') - parser.add_argument('-format', action='store_true', help='Reformat code ' - 'after applying fixes') - parser.add_argument('-style', default='file', help='The style of reformat ' - 'code after applying fixes') - parser.add_argument('-p', dest='build_path', - help='Path used to read a compile command database.') - parser.add_argument('-extra-arg', dest='extra_arg', - action='append', default=[], - help='Additional argument to append to the compiler ' - 'command line.') - parser.add_argument('-extra-arg-before', dest='extra_arg_before', - action='append', default=[], - help='Additional argument to prepend to the compiler ' - 'command line.') - parser.add_argument('-quiet', action='store_true', - help='Run clang-tidy in quiet mode') - args = parser.parse_args() - - db_path = 'compile_commands.json' - - if args.build_path is not None: - build_path = args.build_path - else: - # Find our database - build_path = find_compilation_database(db_path) - - try: - invocation = [args.clang_tidy_binary, '-list-checks'] - invocation.append('-p=' + build_path) - if args.checks: - invocation.append('-checks=' + args.checks) - invocation.append('-') - if args.quiet: - # Even with -quiet we still want to check if we can call clang-tidy. - with open(os.devnull, 'w') as dev_null: - subprocess.check_call(invocation, stdout=dev_null) - else: - subprocess.check_call(invocation) - except: - print("Unable to run clang-tidy.", file=sys.stderr) - sys.exit(1) - - # Load the database and extract all files. - database = json.load(open(os.path.join(build_path, db_path))) - files = [make_absolute(entry['file'], entry['directory']) - for entry in database] - - max_task = args.j - if max_task == 0: - max_task = multiprocessing.cpu_count() - - tmpdir = None - if args.fix or (yaml and args.export_fixes): - check_clang_apply_replacements_binary(args) - tmpdir = tempfile.mkdtemp() - - # Build up a big regexy filter from all command line arguments. - file_name_re = re.compile('|'.join(args.files)) - - return_code = 0 - try: - # Spin up a bunch of tidy-launching threads. - task_queue = queue.Queue(max_task) - # List of files with a non-zero return code. - failed_files = [] - lock = threading.Lock() - for _ in range(max_task): - t = threading.Thread(target=run_tidy, - args=(args, tmpdir, build_path, task_queue, lock, failed_files)) - t.daemon = True - t.start() - - # Fill the queue with files. - for name in files: - if file_name_re.search(name): - task_queue.put(name) - - # Wait for all threads to be done. - task_queue.join() - if len(failed_files): - return_code = 1 - - except KeyboardInterrupt: - # This is a sad hack. Unfortunately subprocess goes - # bonkers with ctrl-c and we start forking merrily. - print('\nCtrl-C detected, goodbye.') - if tmpdir: - shutil.rmtree(tmpdir) - os.kill(0, 9) - - if yaml and args.export_fixes: - print('Writing fixes to ' + args.export_fixes + ' ...') - try: - merge_replacement_files(tmpdir, args.export_fixes) - except: - print('Error exporting fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code=1 - - if args.fix: - print('Applying fixes ...') - try: - apply_fixes(args, tmpdir) - except: - print('Error applying fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code=1 - - if tmpdir: - shutil.rmtree(tmpdir) - sys.exit(return_code) - -if __name__ == '__main__': - main() From 3c1013f66d73fa76c27ce264a3f9e55a01406fa6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 10:45:38 -0500 Subject: [PATCH 06/59] git-hooks: Need to do more research later... --- XML_Parse.h | 6 +++--- cacti/nuca.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/XML_Parse.h b/XML_Parse.h index 9ae752e..937b380 100644 --- a/XML_Parse.h +++ b/XML_Parse.h @@ -302,7 +302,7 @@ typedef struct { double Dir_config[20]; int buffer_sizes[20]; int clockrate; - int ports[20]; + int ports[21]; int device_type; int cache_policy; // 0 no write or write-though with non-write allocate;1 // write-back with write-allocate @@ -324,7 +324,7 @@ typedef struct { double Dir_config[20]; int buffer_sizes[20]; int clockrate; - int ports[20]; + int ports[21]; int device_type; int cache_policy; // 0 no write or write-though with non-write allocate;1 // write-back with write-allocate @@ -434,7 +434,7 @@ typedef struct { int number_of_outputs_of_crossbars; int flit_bits; int input_buffer_entries_per_port; - int ports_of_input_buffer[20]; + int ports_of_input_buffer[21]; // stats double crossbar_accesses; } xbar0_systemNoC; diff --git a/cacti/nuca.cc b/cacti/nuca.cc index 94dfa76..c9fbcd6 100644 --- a/cacti/nuca.cc +++ b/cacti/nuca.cc @@ -41,7 +41,7 @@ unsigned int MIN_BANKSIZE = 65536; #define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ #define CONTR_2_BANK_LAT 0 -int cont_stats[2 /*l2 or l3*/][5 /* cores */][ROUTER_TYPES][7 /*banks*/] +int cont_stats[2 /*l2 or l3*/][5 /* cores */][ROUTER_TYPES][8 /*banks*/] [8 /* cycle time */]; Nuca::Nuca(TechnologyParameter::DeviceType *dt) : deviceType(dt) { From 40ea8dec3bea15451d50d6afb651c7b09c4834a5 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 12:15:01 -0500 Subject: [PATCH 07/59] organization: src directory grepping the sources is now easier CMakeLists copies the binary to the build directory post build. --- .gitignore | 4 ++ CMakeLists.txt | 39 ++---------------- src/CMakeLists.txt | 41 +++++++++++++++++++ XML_Parse.cc => src/XML_Parse.cc | 0 XML_Parse.h => src/XML_Parse.h | 0 arch_const.h => src/arch_const.h | 0 array.cc => src/array.cc | 0 array.h => src/array.h | 0 .../basic_components.cc | 0 basic_components.h => src/basic_components.h | 0 {cacti => src/cacti}/CMakeLists.txt | 0 {cacti => src/cacti}/README | 0 {cacti => src/cacti}/Ucache.cc | 26 +++++++++++- {cacti => src/cacti}/Ucache.h | 0 {cacti => src/cacti}/arbiter.cc | 0 {cacti => src/cacti}/arbiter.h | 0 {cacti => src/cacti}/area.cc | 0 {cacti => src/cacti}/area.h | 0 {cacti => src/cacti}/bank.cc | 0 {cacti => src/cacti}/bank.h | 0 {cacti => src/cacti}/basic_circuit.cc | 0 {cacti => src/cacti}/basic_circuit.h | 0 {cacti => src/cacti}/cache.cfg | 0 {cacti => src/cacti}/cacti_interface.cc | 0 {cacti => src/cacti}/cacti_interface.h | 0 {cacti => src/cacti}/component.cc | 0 {cacti => src/cacti}/component.h | 0 {cacti => src/cacti}/const.h | 0 {cacti => src/cacti}/crossbar.cc | 0 {cacti => src/cacti}/crossbar.h | 0 {cacti => src/cacti}/decoder.cc | 0 {cacti => src/cacti}/decoder.h | 0 {cacti => src/cacti}/htree2.cc | 0 {cacti => src/cacti}/htree2.h | 0 {cacti => src/cacti}/io.cc | 0 {cacti => src/cacti}/io.h | 0 {cacti => src/cacti}/main.cc | 0 {cacti => src/cacti}/mat.cc | 0 {cacti => src/cacti}/mat.h | 0 {cacti => src/cacti}/nuca.cc | 0 {cacti => src/cacti}/nuca.h | 0 {cacti => src/cacti}/parameter.cc | 0 {cacti => src/cacti}/parameter.h | 0 {cacti => src/cacti}/powergating.cc | 0 {cacti => src/cacti}/powergating.h | 0 {cacti => src/cacti}/router.cc | 0 {cacti => src/cacti}/router.h | 0 {cacti => src/cacti}/subarray.cc | 0 {cacti => src/cacti}/subarray.h | 0 {cacti => src/cacti}/technology.cc | 0 {cacti => src/cacti}/uca.cc | 0 {cacti => src/cacti}/uca.h | 0 {cacti => src/cacti}/version_cacti.h | 0 {cacti => src/cacti}/wire.cc | 0 {cacti => src/cacti}/wire.h | 0 core.cc => src/core.cc | 0 core.h => src/core.h | 0 globalvar.h => src/globalvar.h | 0 interconnect.cc => src/interconnect.cc | 0 interconnect.h => src/interconnect.h | 0 iocontrollers.cc => src/iocontrollers.cc | 0 iocontrollers.h => src/iocontrollers.h | 0 logic.cc => src/logic.cc | 0 logic.h => src/logic.h | 0 main.cc => src/main.cc | 0 memoryctrl.cc => src/memoryctrl.cc | 0 memoryctrl.h => src/memoryctrl.h | 0 noc.cc => src/noc.cc | 0 noc.h => src/noc.h | 0 processor.cc => src/processor.cc | 0 processor.h => src/processor.h | 0 sharedcache.cc => src/sharedcache.cc | 0 sharedcache.h => src/sharedcache.h | 0 version.h => src/version.h | 0 xmlParser.cc => src/xmlParser.cc | 0 xmlParser.h => src/xmlParser.h | 0 76 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 src/CMakeLists.txt rename XML_Parse.cc => src/XML_Parse.cc (100%) rename XML_Parse.h => src/XML_Parse.h (100%) rename arch_const.h => src/arch_const.h (100%) rename array.cc => src/array.cc (100%) rename array.h => src/array.h (100%) rename basic_components.cc => src/basic_components.cc (100%) rename basic_components.h => src/basic_components.h (100%) rename {cacti => src/cacti}/CMakeLists.txt (100%) rename {cacti => src/cacti}/README (100%) rename {cacti => src/cacti}/Ucache.cc (98%) rename {cacti => src/cacti}/Ucache.h (100%) rename {cacti => src/cacti}/arbiter.cc (100%) rename {cacti => src/cacti}/arbiter.h (100%) rename {cacti => src/cacti}/area.cc (100%) rename {cacti => src/cacti}/area.h (100%) rename {cacti => src/cacti}/bank.cc (100%) rename {cacti => src/cacti}/bank.h (100%) rename {cacti => src/cacti}/basic_circuit.cc (100%) rename {cacti => src/cacti}/basic_circuit.h (100%) rename {cacti => src/cacti}/cache.cfg (100%) rename {cacti => src/cacti}/cacti_interface.cc (100%) rename {cacti => src/cacti}/cacti_interface.h (100%) rename {cacti => src/cacti}/component.cc (100%) rename {cacti => src/cacti}/component.h (100%) rename {cacti => src/cacti}/const.h (100%) rename {cacti => src/cacti}/crossbar.cc (100%) rename {cacti => src/cacti}/crossbar.h (100%) rename {cacti => src/cacti}/decoder.cc (100%) rename {cacti => src/cacti}/decoder.h (100%) rename {cacti => src/cacti}/htree2.cc (100%) rename {cacti => src/cacti}/htree2.h (100%) rename {cacti => src/cacti}/io.cc (100%) rename {cacti => src/cacti}/io.h (100%) rename {cacti => src/cacti}/main.cc (100%) rename {cacti => src/cacti}/mat.cc (100%) rename {cacti => src/cacti}/mat.h (100%) rename {cacti => src/cacti}/nuca.cc (100%) rename {cacti => src/cacti}/nuca.h (100%) rename {cacti => src/cacti}/parameter.cc (100%) rename {cacti => src/cacti}/parameter.h (100%) rename {cacti => src/cacti}/powergating.cc (100%) rename {cacti => src/cacti}/powergating.h (100%) rename {cacti => src/cacti}/router.cc (100%) rename {cacti => src/cacti}/router.h (100%) rename {cacti => src/cacti}/subarray.cc (100%) rename {cacti => src/cacti}/subarray.h (100%) rename {cacti => src/cacti}/technology.cc (100%) rename {cacti => src/cacti}/uca.cc (100%) rename {cacti => src/cacti}/uca.h (100%) rename {cacti => src/cacti}/version_cacti.h (100%) rename {cacti => src/cacti}/wire.cc (100%) rename {cacti => src/cacti}/wire.h (100%) rename core.cc => src/core.cc (100%) rename core.h => src/core.h (100%) rename globalvar.h => src/globalvar.h (100%) rename interconnect.cc => src/interconnect.cc (100%) rename interconnect.h => src/interconnect.h (100%) rename iocontrollers.cc => src/iocontrollers.cc (100%) rename iocontrollers.h => src/iocontrollers.h (100%) rename logic.cc => src/logic.cc (100%) rename logic.h => src/logic.h (100%) rename main.cc => src/main.cc (100%) rename memoryctrl.cc => src/memoryctrl.cc (100%) rename memoryctrl.h => src/memoryctrl.h (100%) rename noc.cc => src/noc.cc (100%) rename noc.h => src/noc.h (100%) rename processor.cc => src/processor.cc (100%) rename processor.h => src/processor.h (100%) rename sharedcache.cc => src/sharedcache.cc (100%) rename sharedcache.h => src/sharedcache.h (100%) rename version.h => src/version.h (100%) rename xmlParser.cc => src/xmlParser.cc (100%) rename xmlParser.h => src/xmlParser.h (100%) diff --git a/.gitignore b/.gitignore index af60277..0775c39 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,7 @@ unit_test/output # CMake build* + +# Profiling +gmon.out +profile*.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index e0a14fc..0889e15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,44 +18,11 @@ endif() set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function") set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0") set(CMAKE_CXX_FLAGS_RELEASE "-O3") +set(CMAKE_CXX_FLAGS_PROFILE "-O3 -pg -g") set(CMAKE_C_COMPILER clang) set(CMAKE_CXX_COMPILER clang++) add_definitions(-DNTHREADS=1) -add_subdirectory(cacti) - -add_executable(mcpat - XML_Parse.h - arch_const.h - array.h - basic_components.h - core.h - globalvar.h - interconnect.h - iocontrollers.h - logic.h - memoryctrl.h - noc.h - processor.h - sharedcache.h - version.h - xmlParser.h - XML_Parse.cc - array.cc - basic_components.cc - core.cc - interconnect.cc - iocontrollers.cc - logic.cc - main.cc - memoryctrl.cc - noc.cc - processor.cc - sharedcache.cc - xmlParser.cc -) - -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) -target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads) +add_subdirectory(src) + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..e19cbf5 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,41 @@ +add_subdirectory(cacti) + +add_executable(mcpat + XML_Parse.h + arch_const.h + array.h + basic_components.h + core.h + globalvar.h + interconnect.h + iocontrollers.h + logic.h + memoryctrl.h + noc.h + processor.h + sharedcache.h + version.h + xmlParser.h + XML_Parse.cc + array.cc + basic_components.cc + core.cc + interconnect.cc + iocontrollers.cc + logic.cc + main.cc + memoryctrl.cc + noc.cc + processor.cc + sharedcache.cc + xmlParser.cc +) + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) +target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads) + +add_custom_command(TARGET mcpat POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ ${PROJECT_BINARY_DIR}/mcpat + COMMENT "Copying executable ${PROJECT_BINARY_DIR}/mcpat" + ) diff --git a/XML_Parse.cc b/src/XML_Parse.cc similarity index 100% rename from XML_Parse.cc rename to src/XML_Parse.cc diff --git a/XML_Parse.h b/src/XML_Parse.h similarity index 100% rename from XML_Parse.h rename to src/XML_Parse.h diff --git a/arch_const.h b/src/arch_const.h similarity index 100% rename from arch_const.h rename to src/arch_const.h diff --git a/array.cc b/src/array.cc similarity index 100% rename from array.cc rename to src/array.cc diff --git a/array.h b/src/array.h similarity index 100% rename from array.h rename to src/array.h diff --git a/basic_components.cc b/src/basic_components.cc similarity index 100% rename from basic_components.cc rename to src/basic_components.cc diff --git a/basic_components.h b/src/basic_components.h similarity index 100% rename from basic_components.h rename to src/basic_components.h diff --git a/cacti/CMakeLists.txt b/src/cacti/CMakeLists.txt similarity index 100% rename from cacti/CMakeLists.txt rename to src/cacti/CMakeLists.txt diff --git a/cacti/README b/src/cacti/README similarity index 100% rename from cacti/README rename to src/cacti/README diff --git a/cacti/Ucache.cc b/src/cacti/Ucache.cc similarity index 98% rename from cacti/Ucache.cc rename to src/cacti/Ucache.cc index 5b844a9..520f779 100644 --- a/cacti/Ucache.cc +++ b/src/cacti/Ucache.cc @@ -215,8 +215,7 @@ void *calc_time_mt_wrapper(void *void_obj) { delete tag_arr.back(); data_arr.pop_back(); tag_arr.pop_back(); - - pthread_exit(NULL); + return NULL; } bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, @@ -768,6 +767,8 @@ void solve(uca_org_t *fin_res) { ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); +#if NTHREADS > 1 + // If Multithreadded: for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = false; @@ -779,6 +780,13 @@ void solve(uca_org_t *fin_res) { for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } +#else + // Else just a single thread dont bother with pthread overhead + calc_array[0].is_tag = is_tag; + calc_array[0].is_main_mem = false; + calc_array[0].Nspd_min = 0.125; + calc_time_mt_wrapper((void *)(&(calc_array[0]))); +#endif for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].data_arr.sort(mem_array::lt); @@ -799,6 +807,7 @@ void solve(uca_org_t *fin_res) { ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); +#if NTHREADS > 1 for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = g_ip->is_main_mem; @@ -816,6 +825,19 @@ void solve(uca_org_t *fin_res) { for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } +#else + // Else just a single thread dont bother with pthread overhead + calc_array[0].is_tag = is_tag; + calc_array[0].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam || g_ip->fully_assoc)) { + calc_array[0].Nspd_min = + (double)(g_ip->out_w) / (double)(g_ip->block_sz * 8); + } else { + calc_array[0].Nspd_min = 1; + } + calc_time_mt_wrapper((void *)(&(calc_array[0]))); +#endif + data_arr.clear(); for (uint32_t t = 0; t < nthreads; t++) { diff --git a/cacti/Ucache.h b/src/cacti/Ucache.h similarity index 100% rename from cacti/Ucache.h rename to src/cacti/Ucache.h diff --git a/cacti/arbiter.cc b/src/cacti/arbiter.cc similarity index 100% rename from cacti/arbiter.cc rename to src/cacti/arbiter.cc diff --git a/cacti/arbiter.h b/src/cacti/arbiter.h similarity index 100% rename from cacti/arbiter.h rename to src/cacti/arbiter.h diff --git a/cacti/area.cc b/src/cacti/area.cc similarity index 100% rename from cacti/area.cc rename to src/cacti/area.cc diff --git a/cacti/area.h b/src/cacti/area.h similarity index 100% rename from cacti/area.h rename to src/cacti/area.h diff --git a/cacti/bank.cc b/src/cacti/bank.cc similarity index 100% rename from cacti/bank.cc rename to src/cacti/bank.cc diff --git a/cacti/bank.h b/src/cacti/bank.h similarity index 100% rename from cacti/bank.h rename to src/cacti/bank.h diff --git a/cacti/basic_circuit.cc b/src/cacti/basic_circuit.cc similarity index 100% rename from cacti/basic_circuit.cc rename to src/cacti/basic_circuit.cc diff --git a/cacti/basic_circuit.h b/src/cacti/basic_circuit.h similarity index 100% rename from cacti/basic_circuit.h rename to src/cacti/basic_circuit.h diff --git a/cacti/cache.cfg b/src/cacti/cache.cfg similarity index 100% rename from cacti/cache.cfg rename to src/cacti/cache.cfg diff --git a/cacti/cacti_interface.cc b/src/cacti/cacti_interface.cc similarity index 100% rename from cacti/cacti_interface.cc rename to src/cacti/cacti_interface.cc diff --git a/cacti/cacti_interface.h b/src/cacti/cacti_interface.h similarity index 100% rename from cacti/cacti_interface.h rename to src/cacti/cacti_interface.h diff --git a/cacti/component.cc b/src/cacti/component.cc similarity index 100% rename from cacti/component.cc rename to src/cacti/component.cc diff --git a/cacti/component.h b/src/cacti/component.h similarity index 100% rename from cacti/component.h rename to src/cacti/component.h diff --git a/cacti/const.h b/src/cacti/const.h similarity index 100% rename from cacti/const.h rename to src/cacti/const.h diff --git a/cacti/crossbar.cc b/src/cacti/crossbar.cc similarity index 100% rename from cacti/crossbar.cc rename to src/cacti/crossbar.cc diff --git a/cacti/crossbar.h b/src/cacti/crossbar.h similarity index 100% rename from cacti/crossbar.h rename to src/cacti/crossbar.h diff --git a/cacti/decoder.cc b/src/cacti/decoder.cc similarity index 100% rename from cacti/decoder.cc rename to src/cacti/decoder.cc diff --git a/cacti/decoder.h b/src/cacti/decoder.h similarity index 100% rename from cacti/decoder.h rename to src/cacti/decoder.h diff --git a/cacti/htree2.cc b/src/cacti/htree2.cc similarity index 100% rename from cacti/htree2.cc rename to src/cacti/htree2.cc diff --git a/cacti/htree2.h b/src/cacti/htree2.h similarity index 100% rename from cacti/htree2.h rename to src/cacti/htree2.h diff --git a/cacti/io.cc b/src/cacti/io.cc similarity index 100% rename from cacti/io.cc rename to src/cacti/io.cc diff --git a/cacti/io.h b/src/cacti/io.h similarity index 100% rename from cacti/io.h rename to src/cacti/io.h diff --git a/cacti/main.cc b/src/cacti/main.cc similarity index 100% rename from cacti/main.cc rename to src/cacti/main.cc diff --git a/cacti/mat.cc b/src/cacti/mat.cc similarity index 100% rename from cacti/mat.cc rename to src/cacti/mat.cc diff --git a/cacti/mat.h b/src/cacti/mat.h similarity index 100% rename from cacti/mat.h rename to src/cacti/mat.h diff --git a/cacti/nuca.cc b/src/cacti/nuca.cc similarity index 100% rename from cacti/nuca.cc rename to src/cacti/nuca.cc diff --git a/cacti/nuca.h b/src/cacti/nuca.h similarity index 100% rename from cacti/nuca.h rename to src/cacti/nuca.h diff --git a/cacti/parameter.cc b/src/cacti/parameter.cc similarity index 100% rename from cacti/parameter.cc rename to src/cacti/parameter.cc diff --git a/cacti/parameter.h b/src/cacti/parameter.h similarity index 100% rename from cacti/parameter.h rename to src/cacti/parameter.h diff --git a/cacti/powergating.cc b/src/cacti/powergating.cc similarity index 100% rename from cacti/powergating.cc rename to src/cacti/powergating.cc diff --git a/cacti/powergating.h b/src/cacti/powergating.h similarity index 100% rename from cacti/powergating.h rename to src/cacti/powergating.h diff --git a/cacti/router.cc b/src/cacti/router.cc similarity index 100% rename from cacti/router.cc rename to src/cacti/router.cc diff --git a/cacti/router.h b/src/cacti/router.h similarity index 100% rename from cacti/router.h rename to src/cacti/router.h diff --git a/cacti/subarray.cc b/src/cacti/subarray.cc similarity index 100% rename from cacti/subarray.cc rename to src/cacti/subarray.cc diff --git a/cacti/subarray.h b/src/cacti/subarray.h similarity index 100% rename from cacti/subarray.h rename to src/cacti/subarray.h diff --git a/cacti/technology.cc b/src/cacti/technology.cc similarity index 100% rename from cacti/technology.cc rename to src/cacti/technology.cc diff --git a/cacti/uca.cc b/src/cacti/uca.cc similarity index 100% rename from cacti/uca.cc rename to src/cacti/uca.cc diff --git a/cacti/uca.h b/src/cacti/uca.h similarity index 100% rename from cacti/uca.h rename to src/cacti/uca.h diff --git a/cacti/version_cacti.h b/src/cacti/version_cacti.h similarity index 100% rename from cacti/version_cacti.h rename to src/cacti/version_cacti.h diff --git a/cacti/wire.cc b/src/cacti/wire.cc similarity index 100% rename from cacti/wire.cc rename to src/cacti/wire.cc diff --git a/cacti/wire.h b/src/cacti/wire.h similarity index 100% rename from cacti/wire.h rename to src/cacti/wire.h diff --git a/core.cc b/src/core.cc similarity index 100% rename from core.cc rename to src/core.cc diff --git a/core.h b/src/core.h similarity index 100% rename from core.h rename to src/core.h diff --git a/globalvar.h b/src/globalvar.h similarity index 100% rename from globalvar.h rename to src/globalvar.h diff --git a/interconnect.cc b/src/interconnect.cc similarity index 100% rename from interconnect.cc rename to src/interconnect.cc diff --git a/interconnect.h b/src/interconnect.h similarity index 100% rename from interconnect.h rename to src/interconnect.h diff --git a/iocontrollers.cc b/src/iocontrollers.cc similarity index 100% rename from iocontrollers.cc rename to src/iocontrollers.cc diff --git a/iocontrollers.h b/src/iocontrollers.h similarity index 100% rename from iocontrollers.h rename to src/iocontrollers.h diff --git a/logic.cc b/src/logic.cc similarity index 100% rename from logic.cc rename to src/logic.cc diff --git a/logic.h b/src/logic.h similarity index 100% rename from logic.h rename to src/logic.h diff --git a/main.cc b/src/main.cc similarity index 100% rename from main.cc rename to src/main.cc diff --git a/memoryctrl.cc b/src/memoryctrl.cc similarity index 100% rename from memoryctrl.cc rename to src/memoryctrl.cc diff --git a/memoryctrl.h b/src/memoryctrl.h similarity index 100% rename from memoryctrl.h rename to src/memoryctrl.h diff --git a/noc.cc b/src/noc.cc similarity index 100% rename from noc.cc rename to src/noc.cc diff --git a/noc.h b/src/noc.h similarity index 100% rename from noc.h rename to src/noc.h diff --git a/processor.cc b/src/processor.cc similarity index 100% rename from processor.cc rename to src/processor.cc diff --git a/processor.h b/src/processor.h similarity index 100% rename from processor.h rename to src/processor.h diff --git a/sharedcache.cc b/src/sharedcache.cc similarity index 100% rename from sharedcache.cc rename to src/sharedcache.cc diff --git a/sharedcache.h b/src/sharedcache.h similarity index 100% rename from sharedcache.h rename to src/sharedcache.h diff --git a/version.h b/src/version.h similarity index 100% rename from version.h rename to src/version.h diff --git a/xmlParser.cc b/src/xmlParser.cc similarity index 100% rename from xmlParser.cc rename to src/xmlParser.cc diff --git a/xmlParser.h b/src/xmlParser.h similarity index 100% rename from xmlParser.h rename to src/xmlParser.h From 3a3a4018da79dc6d6ce0d46ee78e1cfe51f0b10c Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 14:04:45 -0500 Subject: [PATCH 08/59] Options: Options class with libboost program_options Adding Options class for easier extendability in the future based on boosts program_options library. Requires libboost 1.32.0+ --- .clang-format | 4 +- CMakeLists.txt | 8 + src/CMakeLists.txt | 34 +- src/XML_Parse.cc | 6 +- src/XML_Parse.h | 2 +- src/array.cc | 12 +- src/array.h | 9 +- src/cacti/Ucache.cc | 242 ++++++--- src/cacti/Ucache.h | 27 +- src/cacti/arbiter.cc | 20 +- src/cacti/arbiter.h | 4 +- src/cacti/bank.cc | 123 +++-- src/cacti/basic_circuit.cc | 146 ++++-- src/cacti/basic_circuit.h | 154 ++++-- src/cacti/cacti_interface.h | 114 +++-- src/cacti/component.cc | 24 +- src/cacti/component.h | 20 +- src/cacti/crossbar.cc | 63 ++- src/cacti/crossbar.h | 4 +- src/cacti/decoder.cc | 248 +++++++--- src/cacti/decoder.h | 28 +- src/cacti/htree2.cc | 146 +++--- src/cacti/htree2.h | 15 +- src/cacti/io.cc | 178 +++++-- src/cacti/main.cc | 135 +++-- src/cacti/mat.cc | 394 ++++++++++----- src/cacti/mat.h | 3 +- src/cacti/nuca.cc | 27 +- src/cacti/parameter.cc | 13 +- src/cacti/parameter.h | 12 +- src/cacti/powergating.cc | 15 +- src/cacti/powergating.h | 4 +- src/cacti/router.cc | 10 +- src/cacti/router.h | 8 +- src/cacti/subarray.cc | 9 +- src/cacti/technology.cc | 961 ++++++++++++++++++++++++------------ src/cacti/uca.cc | 128 +++-- src/cacti/wire.cc | 97 +++- src/cacti/wire.h | 9 +- src/core.cc | 783 ++++++++++++++++++++--------- src/core.h | 52 +- src/interconnect.cc | 20 +- src/interconnect.h | 17 +- src/logic.cc | 203 +++++--- src/logic.h | 42 +- src/main.cc | 47 +- src/memoryctrl.cc | 44 +- src/memoryctrl.h | 15 +- src/noc.cc | 57 ++- src/noc.h | 10 +- src/options.cc | 86 ++++ src/options.h | 66 +++ src/processor.cc | 126 +++-- src/sharedcache.cc | 15 +- src/sharedcache.h | 6 +- src/xmlParser.cc | 199 +++++--- src/xmlParser.h | 75 +-- unit_test/unit_test.py | 7 +- util/format.sh | 2 +- 59 files changed, 3692 insertions(+), 1636 deletions(-) create mode 100644 src/options.cc create mode 100644 src/options.h diff --git a/.clang-format b/.clang-format index 08fd74f..aacac66 100644 --- a/.clang-format +++ b/.clang-format @@ -22,8 +22,8 @@ AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: MultiLine -BinPackArguments: true -BinPackParameters: true +BinPackArguments: false +BinPackParameters: false BraceWrapping: AfterCaseLabel: false AfterClass: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 0889e15..bf16735 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,14 @@ cmake_minimum_required (VERSION 3.1) project(mcpat DESCRIPTION "Power Timing Area Calculator" LANGUAGES CXX) +find_package(Boost 1.56 REQUIRED COMPONENTS + program_options) + +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(Threads REQUIRED) + + set (MCPAT_VERSION_MAJOR 1) set (MCPAT_VERSION_MINOR 3) set (MCPAT_VERSION_PATCH 0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e19cbf5..69474b4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,39 +1,39 @@ add_subdirectory(cacti) add_executable(mcpat - XML_Parse.h arch_const.h array.h + array.cc basic_components.h + basic_components.cc core.h + core.cc globalvar.h interconnect.h - iocontrollers.h - logic.h - memoryctrl.h - noc.h - processor.h - sharedcache.h - version.h - xmlParser.h - XML_Parse.cc - array.cc - basic_components.cc - core.cc interconnect.cc + iocontrollers.h iocontrollers.cc + logic.h logic.cc + noc.h + noc.cc main.cc + memoryctrl.h memoryctrl.cc - noc.cc + options.h + options.cc + processor.h processor.cc + sharedcache.h sharedcache.cc + version.h + xmlParser.h xmlParser.cc + XML_Parse.h + XML_Parse.cc ) -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) -target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads) +target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads Boost::program_options) add_custom_command(TARGET mcpat POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PROJECT_BINARY_DIR}/mcpat diff --git a/src/XML_Parse.cc b/src/XML_Parse.cc index 71796b4..86c8ad1 100644 --- a/src/XML_Parse.cc +++ b/src/XML_Parse.cc @@ -39,16 +39,18 @@ using namespace std; -void ParseXML::parse(char *filepath) { +void ParseXML::parse(std::string filepath) { unsigned int i, j, k, m, n; unsigned int NumofCom_4; unsigned int itmp; + char *fp = new char[filepath.length() + 1]; + strcpy(fp, filepath.c_str()); // Initialize all structures ParseXML::initialize(); // this open and parse the XML file: XMLNode xMainNode = XMLNode::openFileHelper( - filepath, "component"); // the 'component' in the first layer + fp, "component"); // the 'component' in the first layer XMLNode xNode2 = xMainNode.getChildNode( "component"); // the 'component' in the second layer diff --git a/src/XML_Parse.h b/src/XML_Parse.h index 937b380..05197bc 100644 --- a/src/XML_Parse.h +++ b/src/XML_Parse.h @@ -606,7 +606,7 @@ typedef struct { class ParseXML { public: - void parse(char *filepath); + void parse(std::string filepath); void initialize(); public: diff --git a/src/array.cc b/src/array.cc index c12662c..55cb2e1 100644 --- a/src/array.cc +++ b/src/array.cc @@ -43,9 +43,12 @@ using namespace std; -ArrayST::ArrayST(const InputParameter *configure_interface, string _name, - enum Device_ty device_ty_, bool opt_local_, - enum Core_type core_ty_, bool _is_default) +ArrayST::ArrayST(const InputParameter *configure_interface, + string _name, + enum Device_ty device_ty_, + bool opt_local_, + enum Core_type core_ty_, + bool _is_default) : l_ip(*configure_interface), name(_name), device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), is_default(_is_default) { @@ -221,7 +224,8 @@ void ArrayST::optimize_array() { if (candidate_solutions.empty() == false) { local_result.valid = true; for (candidate_iter = candidate_solutions.begin(); - candidate_iter != candidate_solutions.end(); ++candidate_iter) + candidate_iter != candidate_solutions.end(); + ++candidate_iter) { if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic) { diff --git a/src/array.h b/src/array.h index 6151ea4..4431f01 100644 --- a/src/array.h +++ b/src/array.h @@ -46,9 +46,12 @@ using namespace std; class ArrayST : public Component { public: ArrayST(){}; - ArrayST(const InputParameter *configure_interface, string _name, - enum Device_ty device_ty_, bool opt_local_ = true, - enum Core_type core_ty_ = Inorder, bool _is_default = true); + ArrayST(const InputParameter *configure_interface, + string _name, + enum Device_ty device_ty_, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, + bool _is_default = true); InputParameter l_ip; string name; diff --git a/src/cacti/Ucache.cc b/src/cacti/Ucache.cc index 520f779..3cb93cd 100644 --- a/src/cacti/Ucache.cc +++ b/src/cacti/Ucache.cc @@ -166,19 +166,39 @@ void *calc_time_mt_wrapper(void *void_obj) { } if (is_tag == true) { - is_valid_partition = - calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, - Ndcm, Ndsam_lev_1, Ndsam_lev_2, tag_arr.back(), - 0, NULL, NULL, is_main_mem); + is_valid_partition = calculate_time(is_tag, + pure_ram, + pure_cam, + Nspd, + Ndwl, + Ndbl, + Ndcm, + Ndsam_lev_1, + Ndsam_lev_2, + tag_arr.back(), + 0, + NULL, + NULL, + is_main_mem); } // If it's a fully-associative cache, the data array partition // parameters are identical to that of the tag array, so compute // data array partition properties also here. if (is_tag == false || g_ip->fully_assoc) { - is_valid_partition = - calculate_time(is_tag /*false*/, pure_ram, pure_cam, Nspd, - Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - data_arr.back(), 0, NULL, NULL, is_main_mem); + is_valid_partition = calculate_time(is_tag /*false*/, + pure_ram, + pure_cam, + Nspd, + Ndwl, + Ndbl, + Ndcm, + Ndsam_lev_1, + Ndsam_lev_2, + data_arr.back(), + 0, + NULL, + NULL, + is_main_mem); } if (is_valid_partition) { @@ -218,14 +238,30 @@ void *calc_time_mt_wrapper(void *void_obj) { return NULL; } -bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, - unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, - unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, - mem_array *ptr_array, int flag_results_populate, - results_mem_array *ptr_results, uca_org_t *ptr_fin_res, +bool calculate_time(bool is_tag, + int pure_ram, + bool pure_cam, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + mem_array *ptr_array, + int flag_results_populate, + results_mem_array *ptr_results, + uca_org_t *ptr_fin_res, bool is_main_mem) { - DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, - Ndsam_lev_1, Ndsam_lev_2, is_main_mem); + DynamicParameter dyn_p(is_tag, + pure_ram, + pure_cam, + Nspd, + Ndwl, + Ndbl, + Ndcm, + Ndsam_lev_1, + Ndsam_lev_2, + is_main_mem); if (dyn_p.is_valid == false) { return false; @@ -238,8 +274,15 @@ bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, // necessary variables } else { - collect_uca_results(Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, uca, - ptr_array, is_main_mem); + collect_uca_results(Nspd, + Ndwl, + Ndbl, + Ndcm, + Ndsam_lev_1, + Ndsam_lev_2, + uca, + ptr_array, + is_main_mem); } delete uca; @@ -250,8 +293,13 @@ void collect_uca_results( // bool is_tag, // int pure_ram, // bool pure_cam, - double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, - unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, UCA const *const uca, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + UCA const *const uca, mem_array *const ptr_array, // int flag_results_populate, // results_mem_array *ptr_results, @@ -569,7 +617,8 @@ bool check_mem_org(mem_array &u, const min_values_t *minval) { return true; } -void find_optimal_uca(uca_org_t *res, min_values_t *minval, +void find_optimal_uca(uca_org_t *res, + min_values_t *minval, list &ulist) { double cost = 0; double min_cost = BIGNUM; @@ -767,14 +816,14 @@ void solve(uca_org_t *fin_res) { ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); -#if NTHREADS > 1 +#if NTHREADS > 1 // If Multithreadded: for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = false; calc_array[t].Nspd_min = 0.125; - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, - (void *)(&(calc_array[t]))); + pthread_create( + &threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); } for (uint32_t t = 0; t < nthreads; t++) { @@ -807,7 +856,7 @@ void solve(uca_org_t *fin_res) { ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); -#if NTHREADS > 1 +#if NTHREADS > 1 for (uint32_t t = 0; t < nthreads; t++) { calc_array[t].is_tag = is_tag; calc_array[t].is_main_mem = g_ip->is_main_mem; @@ -818,26 +867,25 @@ void solve(uca_org_t *fin_res) { calc_array[t].Nspd_min = 1; } - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, - (void *)(&(calc_array[t]))); + pthread_create( + &threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); } for (uint32_t t = 0; t < nthreads; t++) { pthread_join(threads[t], NULL); } #else - // Else just a single thread dont bother with pthread overhead - calc_array[0].is_tag = is_tag; - calc_array[0].is_main_mem = g_ip->is_main_mem; - if (!(pure_cam || g_ip->fully_assoc)) { - calc_array[0].Nspd_min = - (double)(g_ip->out_w) / (double)(g_ip->block_sz * 8); - } else { - calc_array[0].Nspd_min = 1; - } - calc_time_mt_wrapper((void *)(&(calc_array[0]))); + // Else just a single thread dont bother with pthread overhead + calc_array[0].is_tag = is_tag; + calc_array[0].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam || g_ip->fully_assoc)) { + calc_array[0].Nspd_min = + (double)(g_ip->out_w) / (double)(g_ip->block_sz * 8); + } else { + calc_array[0].Nspd_min = 1; + } + calc_time_mt_wrapper((void *)(&(calc_array[0]))); #endif - data_arr.clear(); for (uint32_t t = 0; t < nthreads; t++) { @@ -964,41 +1012,55 @@ void update_dvs(uca_org_t *fin_res) { // Wire::print_wire(); if (fin_res->tag_array2) { - DynamicParameter tag_arr_dyn_p( - true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, - fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, - fin_res->tag_array2->deg_bl_muxing, - fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, - g_ip->is_main_mem); + DynamicParameter tag_arr_dyn_p(true, + g_ip->pure_ram, + g_ip->pure_cam, + fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + g_ip->is_main_mem); if (tag_arr_dyn_p.is_valid) { UCA *tag_arr = new UCA(tag_arr_dyn_p); fin_res->uca_q[i]->tag_array2 = new mem_array(); - collect_uca_results( - fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, - fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, - fin_res->tag_array2->Ndsam_lev_1, - fin_res->tag_array2->Ndsam_lev_2, tag_arr, - fin_res->uca_q[i]->tag_array2, g_ip->is_main_mem); + collect_uca_results(fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + tag_arr, + fin_res->uca_q[i]->tag_array2, + g_ip->is_main_mem); delete tag_arr; } } - DynamicParameter data_arr_dyn_p( - false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, - fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, - fin_res->data_array2->deg_bl_muxing, - fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, - g_ip->is_main_mem); + DynamicParameter data_arr_dyn_p(false, + g_ip->pure_ram, + g_ip->pure_cam, + fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + g_ip->is_main_mem); if (data_arr_dyn_p.is_valid) { UCA *data_arr = new UCA(data_arr_dyn_p); fin_res->uca_q[i]->data_array2 = new mem_array(); - collect_uca_results( - fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, - fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, - fin_res->data_array2->Ndsam_lev_1, - fin_res->data_array2->Ndsam_lev_2, data_arr, - fin_res->uca_q[i]->data_array2, g_ip->is_main_mem); + collect_uca_results(fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + data_arr, + fin_res->uca_q[i]->data_array2, + g_ip->is_main_mem); delete data_arr; } @@ -1052,38 +1114,56 @@ void update_pg(uca_org_t *fin_res) { //(1,1, false); Wire::print_wire(); if (fin_res->tag_array2) { // init_tech_params(g_ip->F_sz_um,true); - DynamicParameter tag_arr_dyn_p( - true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, - fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, - fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, - fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter tag_arr_dyn_p(true, + g_ip->pure_ram, + g_ip->pure_cam, + fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + g_ip->is_main_mem); if (tag_arr_dyn_p.is_valid) { UCA *tag_arr = new UCA(tag_arr_dyn_p); fin_res->uca_pg_reference->tag_array2 = new mem_array(); - collect_uca_results( - fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, - fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, - fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, - tag_arr, fin_res->uca_pg_reference->tag_array2, g_ip->is_main_mem); + collect_uca_results(fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->deg_bl_muxing, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + tag_arr, + fin_res->uca_pg_reference->tag_array2, + g_ip->is_main_mem); delete tag_arr; } } // init_tech_params(g_ip->F_sz_um,false); - DynamicParameter data_arr_dyn_p( - false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, - fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, - fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, - fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter data_arr_dyn_p(false, + g_ip->pure_ram, + g_ip->pure_cam, + fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + g_ip->is_main_mem); if (data_arr_dyn_p.is_valid) { UCA *data_arr = new UCA(data_arr_dyn_p); fin_res->uca_pg_reference->data_array2 = new mem_array(); - collect_uca_results( - fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, - fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, - fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, - data_arr, fin_res->uca_pg_reference->data_array2, g_ip->is_main_mem); + collect_uca_results(fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->deg_bl_muxing, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + data_arr, + fin_res->uca_pg_reference->data_array2, + g_ip->is_main_mem); delete data_arr; } diff --git a/src/cacti/Ucache.h b/src/cacti/Ucache.h index 216bbe6..794a619 100644 --- a/src/cacti/Ucache.h +++ b/src/cacti/Ucache.h @@ -69,19 +69,32 @@ struct solution { powerDef total_power; }; -bool calculate_time(bool is_tag, int pure_ram, bool pure_cam, double Nspd, - unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, - unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, - mem_array *ptr_array, int flag_results_populate, - results_mem_array *ptr_results, uca_org_t *ptr_fin_res, +bool calculate_time(bool is_tag, + int pure_ram, + bool pure_cam, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + mem_array *ptr_array, + int flag_results_populate, + results_mem_array *ptr_results, + uca_org_t *ptr_fin_res, bool is_main_mem); void collect_uca_results( // bool is_tag, // int pure_ram, // bool pure_cam, - double Nspd, unsigned int Ndwl, unsigned int Ndbl, unsigned int Ndcm, - unsigned int Ndsam_lev_1, unsigned int Ndsam_lev_2, UCA const *const uca, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + UCA const *const uca, mem_array *const ptr_array, // int flag_results_populate, // results_mem_array *ptr_results, diff --git a/src/cacti/arbiter.cc b/src/cacti/arbiter.cc index 1b0ff93..51b5dd9 100644 --- a/src/cacti/arbiter.cc +++ b/src/cacti/arbiter.cc @@ -31,7 +31,9 @@ #include "arbiter.h" -Arbiter::Arbiter(double n_req, double flit_size_, double output_len, +Arbiter::Arbiter(double n_req, + double flit_size_, + double output_len, TechnologyParameter::DeviceType *dt) : R(n_req), flit_size(flit_size_), o_len(output_len), deviceType(dt) { min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; @@ -81,16 +83,16 @@ void Arbiter::compute_power() { power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() * Vdd * Vdd / 2 + arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd * Vdd); - double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2, - min_w_pmos * PTn1 * 2, 2, nor); - double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R, - min_w_pmos * PTn2 * R, 2, nor); + double nor1_leak = cmos_Isub_leakage( + g_tp.min_w_nmos_ * NTn1 * 2, min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak = cmos_Isub_leakage( + g_tp.min_w_nmos_ * NTn2 * R, min_w_pmos * PTn2 * R, 2, nor); double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi, min_w_pmos * PTi, 1, inv); - double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2, - min_w_pmos * PTn1 * 2, 2, nor); - double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R, - min_w_pmos * PTn2 * R, 2, nor); + double nor1_leak_gate = cmos_Ig_leakage( + g_tp.min_w_nmos_ * NTn1 * 2, min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak_gate = cmos_Ig_leakage( + g_tp.min_w_nmos_ * NTn2 * R, min_w_pmos * PTn2 * R, 2, nor); double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi, min_w_pmos * PTi, 1, inv); power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * diff --git a/src/cacti/arbiter.h b/src/cacti/arbiter.h index 0a0acc9..26a5693 100644 --- a/src/cacti/arbiter.h +++ b/src/cacti/arbiter.h @@ -44,7 +44,9 @@ class Arbiter : public Component { public: - Arbiter(double Req, double flit_sz, double output_len, + Arbiter(double Req, + double flit_sz, + double output_len, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Arbiter(); diff --git a/src/cacti/bank.cc b/src/cacti/bank.cc index 7957649..fae2a9b 100644 --- a/src/cacti/bank.cc +++ b/src/cacti/bank.cc @@ -74,18 +74,39 @@ Bank::Bank(const DynamicParameter &dyn_p) dataoutbits *= g_ip->data_assoc; } - htree_in_add = - new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0, dataoutbits, 0, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Add_htree); - htree_in_data = - new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0, dataoutbits, 0, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_in_htree); - htree_out_data = - new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0, dataoutbits, 0, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree); + htree_in_add = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + 0, + dataoutbits, + 0, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Add_htree); + htree_in_data = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + 0, + dataoutbits, + 0, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_in_htree); + htree_out_data = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + 0, + dataoutbits, + 0, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_out_htree); // htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, // total_addrbits, datainbits, 0,dataoutbits,0, @@ -94,26 +115,64 @@ Bank::Bank(const DynamicParameter &dyn_p) area.w = htree_in_data->area.w; area.h = htree_in_data->area.h; } else { - htree_in_add = new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, searchinbits, - dataoutbits, searchoutbits, num_mats_ver_dir * 2, - num_mats_hor_dir * 2, Add_htree); - htree_in_data = new Htree2(g_ip->wt, (double)mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, searchinbits, - dataoutbits, searchoutbits, num_mats_ver_dir * 2, - num_mats_hor_dir * 2, Data_in_htree); - htree_out_data = new Htree2( - g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, - datainbits, searchinbits, dataoutbits, searchoutbits, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree); - htree_in_search = new Htree2( - g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, - datainbits, searchinbits, dataoutbits, searchoutbits, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_in_htree, true, true); - htree_out_search = new Htree2( - g_ip->wt, (double)mat.area.w, (double)mat.area.h, total_addrbits, - datainbits, searchinbits, dataoutbits, searchoutbits, - num_mats_ver_dir * 2, num_mats_hor_dir * 2, Data_out_htree, true); + htree_in_add = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + searchinbits, + dataoutbits, + searchoutbits, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Add_htree); + htree_in_data = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + searchinbits, + dataoutbits, + searchoutbits, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_in_htree); + htree_out_data = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + searchinbits, + dataoutbits, + searchoutbits, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_out_htree); + htree_in_search = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + searchinbits, + dataoutbits, + searchoutbits, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_in_htree, + true, + true); + htree_out_search = new Htree2(g_ip->wt, + (double)mat.area.w, + (double)mat.area.h, + total_addrbits, + datainbits, + searchinbits, + dataoutbits, + searchoutbits, + num_mats_ver_dir * 2, + num_mats_hor_dir * 2, + Data_out_htree, + true); area.w = htree_in_data->area.w; area.h = htree_in_data->area.h; diff --git a/src/cacti/basic_circuit.cc b/src/cacti/basic_circuit.cc index d1fb9e5..ac6f8a9 100644 --- a/src/cacti/basic_circuit.cc +++ b/src/cacti/basic_circuit.cc @@ -81,8 +81,12 @@ double logtwo(double x) { /*----------------------------------------------------------------------*/ -double gate_C(double width, double wirelength, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double gate_C(double width, + double wirelength, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { const TechnologyParameter::DeviceType *dt; if (_is_dram && _is_cell) { @@ -106,7 +110,10 @@ double gate_C(double width, double wirelength, bool _is_dram, bool _is_cell, double gate_C_pass(double width, // gate width in um (length is Lphy_periph_global) double wirelength, // poly wire length going to gate in lambda - bool _is_dram, bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) { + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { // v5.0 const TechnologyParameter::DeviceType *dt; @@ -126,10 +133,15 @@ gate_C_pass(double width, // gate width in um (length is Lphy_periph_global) dt->l_phy * Cpolywire; } -double drain_C_(double width, int nchannel, int stack, +double drain_C_(double width, + int nchannel, + int stack, int next_arg_thresh_folding_width_or_height_cell, - double fold_dimension, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { + double fold_dimension, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { double w_folded_tr; const TechnologyParameter::DeviceType *dt; @@ -206,8 +218,13 @@ double drain_C_(double width, int nchannel, int stack, drain_C_metal_connecting_folded_tr); } -double tr_R_on(double width, int nchannel, int stack, bool _is_dram, - bool _is_cell, bool _is_wl_tr, bool _is_sleep_tx) { +double tr_R_on(double width, + int nchannel, + int stack, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { const TechnologyParameter::DeviceType *dt; if ((_is_dram) && (_is_cell)) { @@ -231,8 +248,12 @@ double tr_R_on(double width, int nchannel, int stack, bool _is_dram, * data wordline to estimate the wordline driver size. */ // returns width in um -double R_to_w(double res, int nchannel, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double R_to_w(double res, + int nchannel, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { const TechnologyParameter::DeviceType *dt; if ((_is_dram) && (_is_cell)) { @@ -289,8 +310,12 @@ double horowitz(double inputramptime, // input rise time return (td); } -double cmos_Ileak(double nWidth, double pWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double cmos_Ileak(double nWidth, + double pWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor @@ -318,8 +343,11 @@ int combination(int n, int m) { return ret; } -double simplified_nmos_Isat(double nwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double simplified_nmos_Isat(double nwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor @@ -334,8 +362,11 @@ double simplified_nmos_Isat(double nwidth, bool _is_dram, bool _is_cell, return nwidth * dt->I_on_n; } -double simplified_pmos_Isat(double pwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double simplified_pmos_Isat(double pwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor @@ -350,8 +381,11 @@ double simplified_pmos_Isat(double pwidth, bool _is_dram, bool _is_cell, return pwidth * dt->I_on_n / dt->n_to_p_eff_curr_drv_ratio; } -double simplified_nmos_leakage(double nwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double simplified_nmos_leakage(double nwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor @@ -366,8 +400,11 @@ double simplified_nmos_leakage(double nwidth, bool _is_dram, bool _is_cell, return nwidth * dt->I_off_n; } -double simplified_pmos_leakage(double pwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx) { +double simplified_pmos_leakage(double pwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; if ((!_is_dram) && (_is_cell)) { // SRAM cell access transistor @@ -382,7 +419,10 @@ double simplified_pmos_leakage(double pwidth, bool _is_dram, bool _is_cell, return pwidth * dt->I_off_p; } -double cmos_Ig_n(double nWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, +double cmos_Ig_n(double nWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; @@ -398,7 +438,10 @@ double cmos_Ig_n(double nWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, return nWidth * dt->I_g_on_n; } -double cmos_Ig_p(double pWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, +double cmos_Ig_p(double pWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, bool _is_sleep_tx) { TechnologyParameter::DeviceType *dt; @@ -414,15 +457,20 @@ double cmos_Ig_p(double pWidth, bool _is_dram, bool _is_cell, bool _is_wl_tr, return pWidth * dt->I_g_on_p; } -double cmos_Isub_leakage(double nWidth, double pWidth, int fanin, - enum Gate_type g_type, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx, +double cmos_Isub_leakage(double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx, enum Half_net_topology topo) { assert(fanin >= 1); - double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, - _is_wl_tr, _is_sleep_tx); - double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, - _is_wl_tr, _is_sleep_tx); + double nmos_leak = simplified_nmos_leakage( + nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double pmos_leak = simplified_pmos_leakage( + pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); double Isub = 0; int num_states; int num_off_tx; @@ -522,9 +570,14 @@ double cmos_Isub_leakage(double nWidth, double pWidth, int fanin, return Isub; } -double cmos_Ig_leakage(double nWidth, double pWidth, int fanin, - enum Gate_type g_type, bool _is_dram, bool _is_cell, - bool _is_wl_tr, bool _is_sleep_tx, +double cmos_Ig_leakage(double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx, enum Half_net_topology topo) { assert(fanin >= 1); double nmos_leak = @@ -637,10 +690,17 @@ double cmos_Ig_leakage(double nWidth, double pWidth, int fanin, return Ig_on; } -double shortcircuit_simple(double vt, double velocity_index, double c_in, - double c_out, double w_nmos, double w_pmos, - double i_on_n, double i_on_p, double i_on_n_in, - double i_on_p_in, double vdd) { +double shortcircuit_simple(double vt, + double velocity_index, + double c_in, + double c_out, + double w_nmos, + double w_pmos, + double i_on_n, + double i_on_p, + double i_on_n_in, + double i_on_p_in, + double vdd) { double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, @@ -696,9 +756,17 @@ double shortcircuit_simple(double vt, double velocity_index, double c_in, return (p_short_circuit); } -double shortcircuit(double vt, double velocity_index, double c_in, double c_out, - double w_nmos, double w_pmos, double i_on_n, double i_on_p, - double i_on_n_in, double i_on_p_in, double vdd) { +double shortcircuit(double vt, + double velocity_index, + double c_in, + double c_out, + double w_nmos, + double w_pmos, + double i_on_n, + double i_on_p, + double i_on_n_in, + double i_on_p_in, + double vdd) { double p_short_circuit = 0, p_short_circuit_discharge; //, p_short_circuit_charge, diff --git a/src/cacti/basic_circuit.h b/src/cacti/basic_circuit.h index 8d77440..8e3a516 100644 --- a/src/cacti/basic_circuit.h +++ b/src/cacti/basic_circuit.h @@ -71,92 +71,150 @@ enum Half_net_topology { parallel, series }; double logtwo(double x); -double gate_C(double width, double wirelength, bool _is_dram = false, - bool _is_sram = false, bool _is_wl_tr = false, +double gate_C(double width, + double wirelength, + bool _is_dram = false, + bool _is_sram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double gate_C_pass(double width, double wirelength, bool _is_dram = false, - bool _is_sram = false, bool _is_wl_tr = false, +double gate_C_pass(double width, + double wirelength, + bool _is_dram = false, + bool _is_sram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double drain_C_(double width, int nchannel, int stack, +double drain_C_(double width, + int nchannel, + int stack, int next_arg_thresh_folding_width_or_height_cell, - double fold_dimension, bool _is_dram = false, - bool _is_sram = false, bool _is_wl_tr = false, + double fold_dimension, + bool _is_dram = false, + bool _is_sram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double tr_R_on(double width, int nchannel, int stack, bool _is_dram = false, - bool _is_sram = false, bool _is_wl_tr = false, +double tr_R_on(double width, + int nchannel, + int stack, + bool _is_dram = false, + bool _is_sram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double R_to_w(double res, int nchannel, bool _is_dram = false, - bool _is_sram = false, bool _is_wl_tr = false, +double R_to_w(double res, + int nchannel, + bool _is_dram = false, + bool _is_sram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double horowitz(double inputramptime, double tf, double vs1, double vs2, - int rise); +double +horowitz(double inputramptime, double tf, double vs1, double vs2, int rise); -double pmos_to_nmos_sz_ratio(bool _is_dram = false, bool _is_wl_tr = false, +double pmos_to_nmos_sz_ratio(bool _is_dram = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double simplified_nmos_leakage(double nwidth, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double simplified_nmos_leakage(double nwidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double simplified_pmos_leakage(double pwidth, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double simplified_pmos_leakage(double pwidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double simplified_nmos_Isat(double nwidth, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double simplified_nmos_Isat(double nwidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double simplified_pmos_Isat(double pwidth, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double simplified_pmos_Isat(double pwidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double cmos_Ileak(double nWidth, double pWidth, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double cmos_Ileak(double nWidth, + double pWidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false); -double cmos_Ig_n(double nWidth, bool _is_dram = false, bool _is_cell = false, - bool _is_wl_tr = false, bool _is_sleep_tx = false); - -double cmos_Ig_p(double pWidth, bool _is_dram = false, bool _is_cell = false, - bool _is_wl_tr = false, bool _is_sleep_tx = false); - -double cmos_Isub_leakage(double nWidth, double pWidth, int fanin, - enum Gate_type g_type, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double cmos_Ig_n(double nWidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, + bool _is_sleep_tx = false); + +double cmos_Ig_p(double pWidth, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, + bool _is_sleep_tx = false); + +double cmos_Isub_leakage(double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false, enum Half_net_topology topo = series); -double cmos_Ig_leakage(double nWidth, double pWidth, int fanin, - enum Gate_type g_type, bool _is_dram = false, - bool _is_cell = false, bool _is_wl_tr = false, +double cmos_Ig_leakage(double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram = false, + bool _is_cell = false, + bool _is_wl_tr = false, bool _is_sleep_tx = false, enum Half_net_topology topo = series); -double shortcircuit(double vt, double velocity_index, double c_in, double c_out, - double w_nmos, double w_pmos, double i_on_n, double i_on_p, - double i_on_n_in, double i_on_p_in, double vdd); - -double shortcircuit_simple(double vt, double velocity_index, double c_in, - double c_out, double w_nmos, double w_pmos, - double i_on_n, double i_on_p, double i_on_n_in, - double i_on_p_in, double vdd); +double shortcircuit(double vt, + double velocity_index, + double c_in, + double c_out, + double w_nmos, + double w_pmos, + double i_on_n, + double i_on_p, + double i_on_n_in, + double i_on_p_in, + double vdd); + +double shortcircuit_simple(double vt, + double velocity_index, + double c_in, + double c_out, + double w_nmos, + double w_pmos, + double i_on_n, + double i_on_p, + double i_on_n_in, + double i_on_p_in, + double vdd); // set power point product mask; strictly speaking this is not real point // product -inline void set_pppm(double *pppv, double a = 1, double b = 1, double c = 1, - double d = 1) { +inline void +set_pppm(double *pppv, double a = 1, double b = 1, double c = 1, double d = 1) { pppv[0] = a; pppv[1] = b; pppv[2] = c; pppv[3] = d; } -inline void set_sppm(double *sppv, double a = 1, double b = 1, double c = 1, - double d = 1) { +inline void +set_sppm(double *sppv, double a = 1, double b = 1, double c = 1, double d = 1) { sppv[0] = a; sppv[1] = b; sppv[2] = c; diff --git a/src/cacti/cacti_interface.h b/src/cacti/cacti_interface.h index 0a94e7f..b1d1664 100644 --- a/src/cacti/cacti_interface.h +++ b/src/cacti/cacti_interface.h @@ -428,13 +428,31 @@ uca_org_t cacti_interface(InputParameter *const local_interface); uca_org_t init_interface(InputParameter *const local_interface); // McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, int line_size, int associativity, int rw_ports, - int excl_read_ports, int excl_write_ports, int single_ended_read_ports, - int search_ports, int banks, double tech_node, int output_width, - int specific_tag, int tag_width, int access_mode, int cache, int main_mem, - int obj_func_delay, int obj_func_dynamic_power, int obj_func_leakage_power, - int obj_func_cycle_time, int obj_func_area, int dev_func_delay, - int dev_func_dynamic_power, int dev_func_leakage_power, int dev_func_area, + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node, + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, @@ -442,14 +460,27 @@ uca_org_t cacti_interface( // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, - int interconnect_projection_type_in, int wire_inside_mat_type_in, - int wire_outside_mat_type_in, int REPEATERS_IN_HTREE_SEGMENTS_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, - int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, int PAGE_SIZE_BITS_in, - int BURST_LENGTH_in, int INTERNAL_PREFETCH_WIDTH_in, int force_wiretype, - int wiretype, int force_config, int ndwl, int ndbl, int nspd, int ndcm, - int ndsam1, int ndsam2, int ecc); + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in, + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config, + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1, + int ndsam2, + int ecc); // int cache_size, // int line_size, // int associativity, @@ -495,33 +526,58 @@ uca_org_t cacti_interface( // Naveen's interface uca_org_t cacti_interface( - int cache_size, int line_size, int associativity, int rw_ports, - int excl_read_ports, int excl_write_ports, int single_ended_read_ports, - int banks, double tech_node, int page_sz, int burst_length, int pre_width, - int output_width, int specific_tag, int tag_width, + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int banks, + double tech_node, + int page_sz, + int burst_length, + int pre_width, + int output_width, + int specific_tag, + int tag_width, int access_mode, // 0 normal, 1 seq, 2 fast int cache, // scratch ram or cache - int main_mem, int obj_func_delay, int obj_func_dynamic_power, - int obj_func_leakage_power, int obj_func_area, int obj_func_cycle_time, - int dev_func_delay, int dev_func_dynamic_power, int dev_func_leakage_power, - int dev_func_area, int dev_func_cycle_time, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_area, + int obj_func_cycle_time, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, // 0 - aggressive, 1 - normal - int wire_inside_mat_type_in, int wire_outside_mat_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, int is_nuca, // 0 - UCA, 1 - NUCA int core_count, int cache_level, // 0 - L2, 1 - L3 - int nuca_bank_count, int nuca_obj_func_delay, - int nuca_obj_func_dynamic_power, int nuca_obj_func_leakage_power, - int nuca_obj_func_area, int nuca_obj_func_cycle_time, - int nuca_dev_func_delay, int nuca_dev_func_dynamic_power, - int nuca_dev_func_leakage_power, int nuca_dev_func_area, + int nuca_bank_count, + int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, + int nuca_obj_func_leakage_power, + int nuca_obj_func_area, + int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, + int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, + int nuca_dev_func_area, int nuca_dev_func_cycle_time, int REPEATERS_IN_HTREE_SEGMENTS_in, // TODO for now only wires with // repeaters are supported diff --git a/src/cacti/component.cc b/src/cacti/component.cc index 7741ffd..2d58896 100644 --- a/src/cacti/component.cc +++ b/src/cacti/component.cc @@ -63,8 +63,10 @@ double Component::compute_diffusion_width(int num_stacked_in, return total_diff_w; } -double Component::compute_gate_area(int gate_type, int num_inputs, - double w_pmos, double w_nmos, +double Component::compute_gate_area(int gate_type, + int num_inputs, + double w_pmos, + double w_nmos, double h_gate) { if (w_pmos <= 0.0 || w_nmos <= 0.0) { return 0.0; @@ -160,10 +162,16 @@ double Component::height_sense_amplifier(double pitch_sense_amp) { return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; } -int Component::logical_effort(int num_gates_min, double g, double F, - double *w_n, double *w_p, double C_load, - double p_to_n_sz_ratio, bool is_dram_, - bool is_wl_tr_, double max_w_nmos) { +int Component::logical_effort(int num_gates_min, + double g, + double F, + double *w_n, + double *w_p, + double C_load, + double p_to_n_sz_ratio, + bool is_dram_, + bool is_wl_tr_, + double max_w_nmos) { int num_gates = (int)(log(F) / log(fopt)); // check if num_gates is odd. if so, add 1 to make it even @@ -180,8 +188,8 @@ int Component::logical_effort(int num_gates_min, double g, double F, w_p[i] = p_to_n_sz_ratio * w_n[i]; if (w_n[i] > max_w_nmos) { - double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, - is_wl_tr_); + double C_ld = gate_C( + (1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); num_gates = (int)(log(F) / log(fopt)) + 1; num_gates += (num_gates % 2) ? 1 : 0; diff --git a/src/cacti/component.h b/src/cacti/component.h index 44af1bd..636a432 100644 --- a/src/cacti/component.h +++ b/src/cacti/component.h @@ -50,17 +50,27 @@ class Component { double delay; double cycle_time; - double compute_gate_area(int gate_type, int num_inputs, double w_pmos, - double w_nmos, double h_gate); + double compute_gate_area(int gate_type, + int num_inputs, + double w_pmos, + double w_nmos, + double h_gate); double compute_tr_width_after_folding(double input_width, double threshold_folding_width); double height_sense_amplifier(double pitch_sense_amp); protected: - int logical_effort(int num_gates_min, double g, double F, double *w_n, - double *w_p, double C_load, double p_to_n_sz_ratio, - bool is_dram_, bool is_wl_tr_, double max_w_nmos); + int logical_effort(int num_gates_min, + double g, + double F, + double *w_n, + double *w_p, + double C_load, + double p_to_n_sz_ratio, + bool is_dram_, + bool is_wl_tr_, + double max_w_nmos); private: double compute_diffusion_width(int num_stacked_in, int num_folded_tr); diff --git a/src/cacti/crossbar.cc b/src/cacti/crossbar.cc index 744a82d..9a42c62 100644 --- a/src/cacti/crossbar.cc +++ b/src/cacti/crossbar.cc @@ -34,7 +34,9 @@ #define ASPECT_THRESHOLD .8 #define ADJ 1 -Crossbar::Crossbar(double n_inp_, double n_out_, double flit_size_, +Crossbar::Crossbar(double n_inp_, + double n_out_, + double flit_size_, TechnologyParameter::DeviceType *dt) : n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; @@ -94,13 +96,19 @@ void Crossbar::compute_power() { double tri_cap = output_buffer(); assert(tri_cap > 0); // area of a tristate logic - double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, - TriS2 * min_w_pmos, g_tp.cell_h_def); + double g_area = compute_gate_area( + INV, 1, TriS2 * g_tp.min_w_nmos_, TriS2 * min_w_pmos, g_tp.cell_h_def); g_area *= 2; // to model area of output transistors - g_area += compute_gate_area(NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, - TriS1 * min_w_pmos, g_tp.cell_h_def); - g_area += compute_gate_area(NOR, 2, TriS1 * g_tp.min_w_nmos_, - TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area(NAND, + 2, + TriS1 * 2 * g_tp.min_w_nmos_, + TriS1 * min_w_pmos, + g_tp.cell_h_def); + g_area += compute_gate_area(NOR, + 2, + TriS1 * g_tp.min_w_nmos_, + TriS1 * 2 * min_w_pmos, + g_tp.cell_h_def); double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); // effective no. of tristate buffers that need to be laid side by side int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); @@ -131,27 +139,28 @@ void Crossbar::compute_power() { (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd * Vdd) * flit_size; - power.readOp.leakage = n_inp * n_out * flit_size * - (cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, - min_w_pmos * TriS2 * 2, 1, inv) * - Vdd + - cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, - min_w_pmos * TriS1 * 3, 2, nand) * - Vdd + - cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, - min_w_pmos * TriS1 * 3, 2, nor) * - Vdd + - w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.leakage = + n_inp * n_out * flit_size * + (cmos_Isub_leakage( + g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 1, inv) * + Vdd + + cmos_Isub_leakage( + g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, nand) * + Vdd + + cmos_Isub_leakage( + g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, nor) * + Vdd + + w1.power.readOp.leakage + w2.power.readOp.leakage); power.readOp.gate_leakage = n_inp * n_out * flit_size * - (cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 1, - inv) * + (cmos_Ig_leakage( + g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, 1, inv) * Vdd + - cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, - nand) * + cmos_Ig_leakage( + g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, nand) * Vdd + - cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, - nor) * + cmos_Ig_leakage( + g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, 2, nor) * Vdd + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); @@ -162,9 +171,11 @@ void Crossbar::compute_power() { tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out * tri_inp_cap + n_inp * tri_out_cap; - delay = horowitz(w1.signal_rise_time(), res * cap, + delay = horowitz(w1.signal_rise_time(), + res * cap, + deviceType->Vth / deviceType->Vdd, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE); + RISE); Wire wreset(1, 1); } diff --git a/src/cacti/crossbar.h b/src/cacti/crossbar.h index 5bf9e7a..b50a548 100644 --- a/src/cacti/crossbar.h +++ b/src/cacti/crossbar.h @@ -44,7 +44,9 @@ class Crossbar : public Component { public: - Crossbar(double in, double out, double flit_sz, + Crossbar(double in, + double out, + double flit_sz, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Crossbar(); diff --git a/src/cacti/decoder.cc b/src/cacti/decoder.cc index 7573058..80957c1 100644 --- a/src/cacti/decoder.cc +++ b/src/cacti/decoder.cc @@ -40,10 +40,16 @@ using namespace std; -Decoder::Decoder(int _num_dec_signals, bool flag_way_select, - double _C_ld_dec_out, double _R_wire_dec_out, - bool fully_assoc_, bool is_dram_, bool is_wl_tr_, - const Area &cell_, bool power_gating_, int nodes_DSTN_) +Decoder::Decoder(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_, + int nodes_DSTN_) : exist(false), C_ld_dec_out(_C_ld_dec_out), R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), // power(), @@ -109,10 +115,16 @@ void Decoder::compute_widths() { F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); - num_gates = - logical_effort(num_gates_min, num_in_signals == 2 ? gnand2 : gnand3, F, - w_dec_n, w_dec_p, C_ld_dec_out, p_to_n_sz_ratio, is_dram, - is_wl_tr, g_tp.max_w_nmos_dec); + num_gates = logical_effort(num_gates_min, + num_in_signals == 2 ? gnand2 : gnand3, + F, + w_dec_n, + w_dec_p, + C_ld_dec_out, + p_to_n_sz_ratio, + is_dram, + is_wl_tr, + g_tp.max_w_nmos_dec); } } @@ -178,8 +190,13 @@ void Decoder::compute_power_gating() { c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, cell.h); // Psleep tx detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; // if (g_ip->power_gating) - sleeptx = new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, - detalV, nodes_DSTN, area); + sleeptx = new Sleep_tx(g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + nodes_DSTN, + area); } double Decoder::compute_delays(double inrisetime) { @@ -203,7 +220,13 @@ double Decoder::compute_delays(double inrisetime) { c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + - drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, + drain_C_(w_dec_n[0], + NCH, + num_in_signals, + 1, + area.h, + is_dram, + false, is_wl_tr); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); @@ -215,8 +238,8 @@ double Decoder::compute_delays(double inrisetime) { for (i = 1; i < num_gates - 1; ++i) { rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_p[i + 1] + w_dec_n[i + 1], 0.0, is_dram, false, - is_wl_tr); + c_load = gate_C( + w_dec_p[i + 1] + w_dec_n[i + 1], 0.0, is_dram, false, is_wl_tr); c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); @@ -282,10 +305,13 @@ void Decoder::leakage_feedback(double temperature) { } } -PredecBlk::PredecBlk(int num_dec_signals, Decoder *dec_, +PredecBlk::PredecBlk(int num_dec_signals, + Decoder *dec_, double C_wire_predec_blk_out, - double R_wire_predec_blk_out_, int num_dec_per_predec, - bool is_dram, bool is_blk1) + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1) : dec(dec_), exist(false), number_input_addr_bits(0), C_ld_predec_blk_out(0), R_wire_predec_blk_out(0), branch_effort_nand2_gate_output(1), branch_effort_nand3_gate_output(1), @@ -425,10 +451,16 @@ void PredecBlk::compute_widths() { w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - number_gates_L2 = - logical_effort(min_number_gates_L2, flag_L2_gate == 2 ? gnand2 : gnand3, - F, w_L2_n, w_L2_p, C_ld_predec_blk_out, p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); + number_gates_L2 = logical_effort(min_number_gates_L2, + flag_L2_gate == 2 ? gnand2 : gnand3, + F, + w_L2_n, + w_L2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); // Now find the number of gates and widths in first level of predecoder if ((flag_two_unique_paths) || @@ -445,10 +477,16 @@ void PredecBlk::compute_widths() { F = gnand2 * c_load_nand2_path / (gate_C(w_L1_nand2_n[0], 0, is_dram_) + gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = - logical_effort(min_number_gates_L1, gnand2, F, w_L1_nand2_n, - w_L1_nand2_p, c_load_nand2_path, p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); + number_gates_L1_nand2_path = logical_effort(min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + c_load_nand2_path, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } // Now find widths of gates along path in which first gate is a NAND3 @@ -466,10 +504,16 @@ void PredecBlk::compute_widths() { F = gnand3 * c_load_nand3_path / (gate_C(w_L1_nand3_n[0], 0, is_dram_) + gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = - logical_effort(min_number_gates_L1, gnand3, F, w_L1_nand3_n, - w_L1_nand3_p, c_load_nand3_path, p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); + number_gates_L1_nand3_path = logical_effort(min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + c_load_nand3_path, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } } else { // find number of gates and widths in first level of predecoder block // when there is no second level @@ -479,20 +523,32 @@ void PredecBlk::compute_widths() { F = gnand2 * C_ld_predec_blk_out / (gate_C(w_L1_nand2_n[0], 0, is_dram_) + gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = - logical_effort(min_number_gates_L1, gnand2, F, w_L1_nand2_n, - w_L1_nand2_p, C_ld_predec_blk_out, p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); + number_gates_L1_nand2_path = logical_effort(min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } else if (number_inputs_L1_gate == 3) { w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3 * C_ld_predec_blk_out / (gate_C(w_L1_nand3_n[0], 0, is_dram_) + gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = - logical_effort(min_number_gates_L1, gnand3, F, w_L1_nand3_n, - w_L1_nand3_p, C_ld_predec_blk_out, p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); + number_gates_L1_nand3_path = logical_effort(min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } } } @@ -517,8 +573,8 @@ void PredecBlk::compute_area() { leak_L1_nand3 = 0; gate_leak_L1_nand3 = 0; } else { - tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], - w_L1_nand3_n[0], g_tp.cell_h_def); + tot_area_L1_nand3 = compute_gate_area( + NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); gate_leak_L1_nand3 = @@ -588,10 +644,10 @@ void PredecBlk::compute_area() { } for (int i = 1; i < number_gates_L1_nand2_path; ++i) { - tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], - w_L1_nand2_n[i], g_tp.cell_h_def); - leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, - nand, is_dram_); + tot_area_L1_nand2 += compute_gate_area( + INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); + leak_L1_nand2 += cmos_Isub_leakage( + w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); } @@ -600,10 +656,10 @@ void PredecBlk::compute_area() { gate_leak_L1_nand2 *= num_L1_nand2; for (int i = 1; i < number_gates_L1_nand3_path; ++i) { - tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], - w_L1_nand3_n[i], g_tp.cell_h_def); - leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, - nand, is_dram_); + tot_area_L1_nand3 += compute_gate_area( + INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); + leak_L1_nand3 += cmos_Isub_leakage( + w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); } @@ -940,8 +996,8 @@ void PredecBlk::leakage_feedback(double temperature) { } for (int i = 1; i < number_gates_L1_nand2_path; ++i) { - leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, - nand, is_dram_); + leak_L1_nand2 += cmos_Isub_leakage( + w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); } @@ -949,8 +1005,8 @@ void PredecBlk::leakage_feedback(double temperature) { gate_leak_L1_nand2 *= num_L1_nand2; for (int i = 1; i < number_gates_L1_nand3_path; ++i) { - leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, - nand, is_dram_); + leak_L1_nand3 += cmos_Isub_leakage( + w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); } @@ -1103,10 +1159,16 @@ void PredecBlkDrv::compute_widths() { width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); - number_gates_nand2_path = - logical_effort(min_number_gates, 1, F, width_nand2_path_n, - width_nand2_path_p, c_load_nand2_path_out, - p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); + number_gates_nand2_path = logical_effort(min_number_gates, + 1, + F, + width_nand2_path_n, + width_nand2_path_p, + c_load_nand2_path_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } if ((blk->flag_two_unique_paths) || (blk->number_inputs_L1_gate == 3) || @@ -1117,10 +1179,16 @@ void PredecBlkDrv::compute_widths() { width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); - number_gates_nand3_path = - logical_effort(min_number_gates, 1, F, width_nand3_path_n, - width_nand3_path_p, c_load_nand3_path_out, - p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); + number_gates_nand3_path = logical_effort(min_number_gates, + 1, + F, + width_nand3_path_n, + width_nand3_path_p, + c_load_nand3_path_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } } } @@ -1136,9 +1204,11 @@ void PredecBlkDrv::compute_area() { if (flag_driver_exists) { // first check whether a predecoder block driver is // needed for (int i = 0; i < number_gates_nand2_path; ++i) { - area_nand2_path += - compute_gate_area(INV, 1, width_nand2_path_p[i], - width_nand2_path_n[i], g_tp.cell_h_def); + area_nand2_path += compute_gate_area(INV, + 1, + width_nand2_path_p[i], + width_nand2_path_n[i], + g_tp.cell_h_def); leak_nand2_path += cmos_Isub_leakage( width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); gate_leak_nand2_path += cmos_Ig_leakage( @@ -1155,9 +1225,11 @@ void PredecBlkDrv::compute_area() { num_buffers_driving_4_nand2_load); for (int i = 0; i < number_gates_nand3_path; ++i) { - area_nand3_path += - compute_gate_area(INV, 1, width_nand3_path_p[i], - width_nand3_path_n[i], g_tp.cell_h_def); + area_nand3_path += compute_gate_area(INV, + 1, + width_nand3_path_p[i], + width_nand3_path_n[i], + g_tp.cell_h_def); leak_nand3_path += cmos_Isub_leakage( width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); gate_leak_nand3_path += cmos_Ig_leakage( @@ -1200,8 +1272,8 @@ PredecBlkDrv::compute_delays(double inrisetime_nand2_path, c_gate_load = gate_C( width_nand2_path_p[i + 1] + width_nand2_path_n[i + 1], 0.0, is_dram_); c_intrinsic = - drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, - is_dram_) + + drain_C_( + width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_gate_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); @@ -1216,8 +1288,8 @@ PredecBlkDrv::compute_delays(double inrisetime_nand2_path, i = number_gates_nand2_path - 1; rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); c_intrinsic = - drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, - is_dram_) + + drain_C_( + width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); c_load = c_load_nand2_path_out; tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2; @@ -1235,8 +1307,8 @@ PredecBlkDrv::compute_delays(double inrisetime_nand2_path, c_gate_load = gate_C( width_nand3_path_p[i + 1] + width_nand3_path_n[i + 1], 0.0, is_dram_); c_intrinsic = - drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, - is_dram_) + + drain_C_( + width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_gate_load); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); @@ -1251,8 +1323,8 @@ PredecBlkDrv::compute_delays(double inrisetime_nand2_path, i = number_gates_nand3_path - 1; rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_); c_intrinsic = - drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, - is_dram_) + + drain_C_( + width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); c_load = c_load_nand3_path_out; tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2; @@ -1452,8 +1524,12 @@ Predec::get_max_delay_before_decoder(pair input_pair1, return ret_val; } -Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, - bool is_dram, bool power_gating_, int nodes_DSTN_) +Driver::Driver(double c_gate_load_, + double c_wire_load_, + double r_wire_load_, + bool is_dram, + bool power_gating_, + int nodes_DSTN_) : number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), // power(), @@ -1475,9 +1551,16 @@ void Driver::compute_widths() { width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); - number_gates = - logical_effort(min_number_gates, 1, F, width_n, width_p, c_load, - p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); + number_gates = logical_effort(min_number_gates, + 1, + F, + width_n, + width_p, + c_load, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); } void Driver::compute_area() { @@ -1513,10 +1596,13 @@ void Driver::compute_power_gating() { c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h); // Psleep tx detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; // if (g_ip->power_gating) - sleeptx = - new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, - nodes_DSTN, // default is 1 for drivers - area); + sleeptx = new Sleep_tx(g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + nodes_DSTN, // default is 1 for drivers + area); } double Driver::compute_delay(double inrisetime) { diff --git a/src/cacti/decoder.h b/src/cacti/decoder.h index b2e525e..80cc86c 100644 --- a/src/cacti/decoder.h +++ b/src/cacti/decoder.h @@ -43,9 +43,15 @@ using namespace std; class Decoder : public Component { public: - Decoder(int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, - double _R_wire_dec_out, bool fully_assoc_, bool is_dram_, - bool is_wl_tr_, const Area &cell_, bool power_gating_ = false, + Decoder(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_ = false, int nodes_DSTN_ = 1); bool exist; @@ -85,8 +91,12 @@ class Decoder : public Component { class PredecBlk : public Component { public: - PredecBlk(int num_dec_signals, Decoder *dec, double C_wire_predec_blk_out, - double R_wire_predec_blk_out, int num_dec_per_predec, bool is_dram_, + PredecBlk(int num_dec_signals, + Decoder *dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, bool is_blk1); Decoder *dec; @@ -206,8 +216,12 @@ class Predec : public Component { class Driver : public Component { public: - Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, - bool is_dram, bool power_gating_ = false, int nodes_DSTN_ = 1); + Driver(double c_gate_load_, + double c_wire_load_, + double r_wire_load_, + bool is_dram, + bool power_gating_ = false, + int nodes_DSTN_ = 1); int number_gates; int min_number_gates; diff --git a/src/cacti/htree2.cc b/src/cacti/htree2.cc index aa2c8e8..b6d3074 100644 --- a/src/cacti/htree2.cc +++ b/src/cacti/htree2.cc @@ -36,10 +36,19 @@ #include #include -Htree2::Htree2(enum Wire_type wire_model, double mat_w, double mat_h, - int a_bits, int d_inbits, int search_data_in, int d_outbits, - int search_data_out, int bl, int wl, enum Htree_type htree_type, - bool uca_tree_, bool search_tree_, +Htree2::Htree2(enum Wire_type wire_model, + double mat_w, + double mat_h, + int a_bits, + int d_inbits, + int search_data_in, + int d_outbits, + int search_data_out, + int bl, + int wl, + enum Htree_type htree_type, + bool uca_tree_, + bool search_tree_, TechnologyParameter::DeviceType *dt) : in_rise_time(0), out_rise_time(0), tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), add_bits(a_bits), @@ -110,34 +119,40 @@ void Htree2::input_nand(double s1, double s2, double l_eff) { double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) * (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)); - delay += horowitz(w1.out_rise_time, tc, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE); + delay += horowitz(w1.out_rise_time, + tc, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + RISE); power.readOp.dynamic += 0.5 * - (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, - g_tp.cell_h_def) + + (2 * drain_C_( + pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, - g_tp.cell_h_def) + + (2 * drain_C_( + pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * deviceType->Vdd * deviceType->Vdd * wire_bw; - power.readOp.leakage += - (wire_bw * cmos_Isub_leakage(min_w_nmos * (nsize * 2), - min_w_pmos * nsize * 2, 2, nand)) * - deviceType->Vdd; + power.readOp.leakage += (wire_bw * cmos_Isub_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, + 2, + nand)) * + deviceType->Vdd; power.readOp.power_gated_leakage += - (wire_bw * cmos_Isub_leakage(min_w_nmos * (nsize * 2), - min_w_pmos * nsize * 2, 2, nand)) * + (wire_bw * + cmos_Isub_leakage( + min_w_nmos * (nsize * 2), min_w_pmos * nsize * 2, 2, nand)) * deviceType->Vcc_min; power.readOp.gate_leakage += - (wire_bw * cmos_Ig_leakage(min_w_nmos * (nsize * 2), - min_w_pmos * nsize * 2, 2, nand)) * + (wire_bw * + cmos_Ig_leakage( + min_w_nmos * (nsize * 2), min_w_pmos * nsize * 2, 2, nand)) * deviceType->Vdd; } @@ -168,8 +183,11 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) { double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; - delay += horowitz(w1.out_rise_time, tc, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE); + delay += horowitz(w1.out_rise_time, + tc, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + RISE); // nand power.readOp.dynamic += @@ -235,32 +253,36 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) { if (uca_tree) { power.readOp.leakage += - cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Isub_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vdd * wire_bw; /*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, - min_w_pmos * size * 3, 2, nand) * - deviceType->Vdd * wire_bw; // nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, - min_w_pmos * size * 3, 2, nor) * - deviceType->Vdd * wire_bw; // nor + power.readOp.leakage += + cmos_Isub_leakage( + min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.leakage += + cmos_Isub_leakage( + min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Isub_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vcc_min * wire_bw; /*inverter + output tr*/ - power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, - nand) * - deviceType->Vcc_min * wire_bw; // nand - power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, - nor) * - deviceType->Vcc_min * wire_bw; // nor + power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, + 2, + nand) * + deviceType->Vcc_min * wire_bw; // nand + power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, + 2, + nor) * + deviceType->Vcc_min * wire_bw; // nor power.readOp.gate_leakage += - cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Ig_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vdd * wire_bw; /*inverter + output tr*/ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * @@ -271,32 +293,36 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) { // power.readOp.gate_leakage *=; } else { power.readOp.leakage += - cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Isub_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vdd * wire_bw; /*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, - min_w_pmos * size * 3, 2, nand) * - deviceType->Vdd * wire_bw; // nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos * size * 3, - min_w_pmos * size * 3, 2, nor) * - deviceType->Vdd * wire_bw; // nor + power.readOp.leakage += + cmos_Isub_leakage( + min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * + deviceType->Vdd * wire_bw; // nand + power.readOp.leakage += + cmos_Isub_leakage( + min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nor) * + deviceType->Vdd * wire_bw; // nor power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Isub_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vcc_min * wire_bw; /*inverter + output tr*/ - power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, - nand) * - deviceType->Vcc_min * wire_bw; // nand - power.readOp.power_gated_leakage += - cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, - nor) * - deviceType->Vcc_min * wire_bw; // nor + power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, + 2, + nand) * + deviceType->Vcc_min * wire_bw; // nand + power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos * size * 3, + min_w_pmos * size * 3, + 2, + nor) * + deviceType->Vcc_min * wire_bw; // nor power.readOp.gate_leakage += - cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, - inv) * + cmos_Ig_leakage( + min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, 1, inv) * deviceType->Vdd * wire_bw; /*inverter + output tr*/ power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, nand) * diff --git a/src/cacti/htree2.h b/src/cacti/htree2.h index 5023c49..dee0c48 100644 --- a/src/cacti/htree2.h +++ b/src/cacti/htree2.h @@ -45,9 +45,18 @@ class Htree2 : public Component { public: - Htree2(enum Wire_type wire_model, double mat_w, double mat_h, int add, - int data_in, int search_data_in, int data_out, int search_data_out, - int bl, int wl, enum Htree_type h_type, bool uca_tree_ = false, + Htree2(enum Wire_type wire_model, + double mat_w, + double mat_h, + int add, + int data_in, + int search_data_in, + int data_out, + int search_data_out, + int bl, + int wl, + enum Htree_type h_type, + bool uca_tree_ = false, bool search_tree_ = false, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Htree2(){}; diff --git a/src/cacti/io.cc b/src/cacti/io.cc index 4518f9d..fb80bf4 100644 --- a/src/cacti/io.cc +++ b/src/cacti/io.cc @@ -88,7 +88,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-internal prefetch width", line, + if (!strncmp("-internal prefetch width", + line, strlen("-internal prefetch width"))) { sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w)); continue; @@ -195,8 +196,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Powergating voltage", line, - strlen("-Powergating voltage"))) { + if (!strncmp( + "-Powergating voltage", line, strlen("-Powergating voltage"))) { sscanf(line, "-Powergating voltage%[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("default", temp_var, sizeof("default"))) { specific_vcc_min = false; @@ -216,8 +217,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-operating temperature", line, - strlen("-operating temperature"))) { + if (!strncmp( + "-operating temperature", line, strlen("-operating temperature"))) { sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp)); continue; } @@ -285,8 +286,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Data array cell type", line, - strlen("-Data array cell type"))) { + if (!strncmp( + "-Data array cell type", line, strlen("-Data array cell type"))) { sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { @@ -306,10 +307,11 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Data array peripheral type", line, + if (!strncmp("-Data array peripheral type", + line, strlen("-Data array peripheral type"))) { - sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, - temp_var); + sscanf( + line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { data_arr_peri_global_tech_type = 0; @@ -324,8 +326,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Tag array cell type", line, - strlen("-Tag array cell type"))) { + if (!strncmp( + "-Tag array cell type", line, strlen("-Tag array cell type"))) { sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { @@ -345,7 +347,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Tag array peripheral type", line, + if (!strncmp("-Tag array peripheral type", + line, strlen("-Tag array peripheral type"))) { sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); @@ -362,15 +365,25 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } if (!strncmp("-design", line, strlen("-design"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_wt), - &(dynamic_power_wt), &(leakage_power_wt), &(cycle_time_wt), + sscanf(line, + "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", + jk, + &(delay_wt), + &(dynamic_power_wt), + &(leakage_power_wt), + &(cycle_time_wt), &(area_wt)); continue; } if (!strncmp("-deviate", line, strlen("-deviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_dev), - &(dynamic_power_dev), &(leakage_power_dev), &(cycle_time_dev), + sscanf(line, + "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", + jk, + &(delay_dev), + &(dynamic_power_dev), + &(leakage_power_dev), + &(cycle_time_dev), &(area_dev)); continue; } @@ -388,16 +401,26 @@ void InputParameter::parse_cfg(const string &in_file) { } if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_wt_nuca), - &(dynamic_power_wt_nuca), &(leakage_power_wt_nuca), - &(cycle_time_wt_nuca), &(area_wt_nuca)); + sscanf(line, + "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", + jk, + &(delay_wt_nuca), + &(dynamic_power_wt_nuca), + &(leakage_power_wt_nuca), + &(cycle_time_wt_nuca), + &(area_wt_nuca)); continue; } if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, &(delay_dev_nuca), - &(dynamic_power_dev_nuca), &(leakage_power_dev_nuca), - &(cycle_time_dev_nuca), &(area_dev_nuca)); + sscanf(line, + "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", + jk, + &(delay_dev_nuca), + &(dynamic_power_dev_nuca), + &(leakage_power_dev_nuca), + &(cycle_time_dev_nuca), + &(area_dev_nuca)); continue; } @@ -447,7 +470,8 @@ void InputParameter::parse_cfg(const string &in_file) { continue; } - if (!strncmp("-Interconnect projection", line, + if (!strncmp("-Interconnect projection", + line, strlen("-Interconnect projection"))) { sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var); @@ -566,7 +590,8 @@ void InputParameter::parse_cfg(const string &in_file) { } } - if (!strncmp("-Interconnect Power Gating", line, + if (!strncmp("-Interconnect Power Gating", + line, strlen("-Interconnect Power Gating"))) { sscanf(line, "-Interconnect Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { @@ -576,7 +601,8 @@ void InputParameter::parse_cfg(const string &in_file) { } } - if (!strncmp("-Power Gating Performance Loss", line, + if (!strncmp("-Power Gating Performance Loss", + line, strlen("-Power Gating Performance Loss"))) { sscanf(line, "-Power Gating Performance Loss %lf", &(perfloss)); continue; @@ -591,8 +617,8 @@ void InputParameter::parse_cfg(const string &in_file) { } } - if (!strncmp("-Long channel devices", line, - strlen("-Long channel devices"))) { + if (!strncmp( + "-Long channel devices", line, strlen("-Long channel devices"))) { sscanf(line, "-Long channel devices %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { long_channel_device = true; @@ -601,7 +627,8 @@ void InputParameter::parse_cfg(const string &in_file) { } } - if (!strncmp("-Print input parameters", line, + if (!strncmp("-Print input parameters", + line, strlen("-Print input parameters"))) { sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); if (!strncmp("true", temp_var, strlen("true"))) { @@ -877,35 +904,58 @@ uca_org_t cacti_interface(const string &infile_name) { // cacti6.5's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, int line_size, int associativity, int rw_ports, - int excl_read_ports, int excl_write_ports, int single_ended_read_ports, + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, int banks, double tech_node, // in nm - int page_sz, int burst_length, int pre_width, int output_width, - int specific_tag, int tag_width, + int page_sz, + int burst_length, + int pre_width, + int output_width, + int specific_tag, + int tag_width, int access_mode, // 0 normal, 1 seq, 2 fast int cache, // scratch ram or cache - int main_mem, int obj_func_delay, int obj_func_dynamic_power, - int obj_func_leakage_power, int obj_func_area, int obj_func_cycle_time, - int dev_func_delay, int dev_func_dynamic_power, int dev_func_leakage_power, - int dev_func_area, int dev_func_cycle_time, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_area, + int obj_func_cycle_time, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate int temp, int wt, // 0 - default(search across everything), 1 - global, 2 - 5% delay // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, // 0-4 int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, // 0 - aggressive, 1 - normal - int wire_inside_mat_type_in, int wire_outside_mat_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, int is_nuca, // 0 - UCA, 1 - NUCA int core_count, int cache_level, // 0 - L2, 1 - L3 - int nuca_bank_count, int nuca_obj_func_delay, - int nuca_obj_func_dynamic_power, int nuca_obj_func_leakage_power, - int nuca_obj_func_area, int nuca_obj_func_cycle_time, - int nuca_dev_func_delay, int nuca_dev_func_dynamic_power, - int nuca_dev_func_leakage_power, int nuca_dev_func_area, + int nuca_bank_count, + int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, + int nuca_obj_func_leakage_power, + int nuca_obj_func_area, + int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, + int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, + int nuca_dev_func_area, int nuca_dev_func_cycle_time, int REPEATERS_IN_HTREE_SEGMENTS_in, // TODO for now only wires with // repeaters are supported @@ -1046,17 +1096,29 @@ uca_org_t cacti_interface( // McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, int line_size, int associativity, int rw_ports, + int cache_size, + int line_size, + int associativity, + int rw_ports, int excl_read_ports, // para5 - int excl_write_ports, int single_ended_read_ports, int search_ports, + int excl_write_ports, + int single_ended_read_ports, + int search_ports, int banks, double tech_node, // para10 - int output_width, int specific_tag, int tag_width, int access_mode, + int output_width, + int specific_tag, + int tag_width, + int access_mode, int cache, // para15 - int main_mem, int obj_func_delay, int obj_func_dynamic_power, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, int obj_func_leakage_power, int obj_func_cycle_time, // para20 - int obj_func_area, int dev_func_delay, int dev_func_dynamic_power, + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, int dev_func_leakage_power, int dev_func_area, // para25 int dev_func_cycle_time, @@ -1066,19 +1128,27 @@ uca_org_t cacti_interface( // penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing int data_arr_ram_cell_tech_flavor_in, // para30 int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, int tag_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, int interconnect_projection_type_in, int wire_inside_mat_type_in, // para35 - int wire_outside_mat_type_in, int REPEATERS_IN_HTREE_SEGMENTS_in, + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, int PAGE_SIZE_BITS_in, // para40 - int BURST_LENGTH_in, int INTERNAL_PREFETCH_WIDTH_in, int force_wiretype, + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, int wiretype, int force_config, // para45 - int ndwl, int ndbl, int nspd, int ndcm, + int ndwl, + int ndbl, + int nspd, + int ndcm, int ndsam1, // para50 - int ndsam2, int ecc) { + int ndsam2, + int ecc) { g_ip = new InputParameter(); uca_org_t fin_res; diff --git a/src/cacti/main.cc b/src/cacti/main.cc index f50b19d..9236506 100644 --- a/src/cacti/main.cc +++ b/src/cacti/main.cc @@ -72,36 +72,113 @@ int main(int argc, char *argv[]) { result = cacti_interface(infile_name); } } else if (argc == 53) { - result = cacti_interface( - atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), - atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), - atoi(argv[9]), atof(argv[10]), atoi(argv[11]), atoi(argv[12]), - atoi(argv[13]), atoi(argv[14]), atoi(argv[15]), atoi(argv[16]), - atoi(argv[17]), atoi(argv[18]), atoi(argv[19]), atoi(argv[20]), - atoi(argv[21]), atoi(argv[22]), atoi(argv[23]), atoi(argv[24]), - atoi(argv[25]), atoi(argv[26]), atoi(argv[27]), atoi(argv[28]), - atoi(argv[29]), atoi(argv[30]), atoi(argv[31]), atoi(argv[32]), - atoi(argv[33]), atoi(argv[34]), atoi(argv[35]), atoi(argv[36]), - atoi(argv[37]), atoi(argv[38]), atoi(argv[39]), atoi(argv[40]), - atoi(argv[41]), atoi(argv[42]), atoi(argv[43]), atoi(argv[44]), - atoi(argv[45]), atoi(argv[46]), atoi(argv[47]), atoi(argv[48]), - atoi(argv[49]), atoi(argv[50]), atoi(argv[51]), atoi(argv[52])); + result = cacti_interface(atoi(argv[1]), + atoi(argv[2]), + atoi(argv[3]), + atoi(argv[4]), + atoi(argv[5]), + atoi(argv[6]), + atoi(argv[7]), + atoi(argv[8]), + atoi(argv[9]), + atof(argv[10]), + atoi(argv[11]), + atoi(argv[12]), + atoi(argv[13]), + atoi(argv[14]), + atoi(argv[15]), + atoi(argv[16]), + atoi(argv[17]), + atoi(argv[18]), + atoi(argv[19]), + atoi(argv[20]), + atoi(argv[21]), + atoi(argv[22]), + atoi(argv[23]), + atoi(argv[24]), + atoi(argv[25]), + atoi(argv[26]), + atoi(argv[27]), + atoi(argv[28]), + atoi(argv[29]), + atoi(argv[30]), + atoi(argv[31]), + atoi(argv[32]), + atoi(argv[33]), + atoi(argv[34]), + atoi(argv[35]), + atoi(argv[36]), + atoi(argv[37]), + atoi(argv[38]), + atoi(argv[39]), + atoi(argv[40]), + atoi(argv[41]), + atoi(argv[42]), + atoi(argv[43]), + atoi(argv[44]), + atoi(argv[45]), + atoi(argv[46]), + atoi(argv[47]), + atoi(argv[48]), + atoi(argv[49]), + atoi(argv[50]), + atoi(argv[51]), + atoi(argv[52])); } else { - result = cacti_interface( - atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), - atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), atoi(argv[8]), - atof(argv[9]), atoi(argv[10]), atoi(argv[11]), atoi(argv[12]), - atoi(argv[13]), atoi(argv[14]), atoi(argv[15]), atoi(argv[16]), - atoi(argv[17]), atoi(argv[18]), atoi(argv[19]), atoi(argv[20]), - atoi(argv[21]), atoi(argv[22]), atoi(argv[23]), atoi(argv[24]), - atoi(argv[25]), atoi(argv[26]), atoi(argv[27]), atoi(argv[28]), - atoi(argv[29]), atoi(argv[30]), atoi(argv[31]), atoi(argv[32]), - atoi(argv[33]), atoi(argv[34]), atoi(argv[35]), atoi(argv[36]), - atoi(argv[37]), atoi(argv[38]), atoi(argv[39]), atoi(argv[40]), - atoi(argv[41]), atoi(argv[42]), atoi(argv[43]), atoi(argv[44]), - atoi(argv[45]), atoi(argv[46]), atoi(argv[47]), atoi(argv[48]), - atoi(argv[49]), atoi(argv[50]), atoi(argv[51]), atoi(argv[52]), - atoi(argv[53]), atoi(argv[54])); + result = cacti_interface(atoi(argv[1]), + atoi(argv[2]), + atoi(argv[3]), + atoi(argv[4]), + atoi(argv[5]), + atoi(argv[6]), + atoi(argv[7]), + atoi(argv[8]), + atof(argv[9]), + atoi(argv[10]), + atoi(argv[11]), + atoi(argv[12]), + atoi(argv[13]), + atoi(argv[14]), + atoi(argv[15]), + atoi(argv[16]), + atoi(argv[17]), + atoi(argv[18]), + atoi(argv[19]), + atoi(argv[20]), + atoi(argv[21]), + atoi(argv[22]), + atoi(argv[23]), + atoi(argv[24]), + atoi(argv[25]), + atoi(argv[26]), + atoi(argv[27]), + atoi(argv[28]), + atoi(argv[29]), + atoi(argv[30]), + atoi(argv[31]), + atoi(argv[32]), + atoi(argv[33]), + atoi(argv[34]), + atoi(argv[35]), + atoi(argv[36]), + atoi(argv[37]), + atoi(argv[38]), + atoi(argv[39]), + atoi(argv[40]), + atoi(argv[41]), + atoi(argv[42]), + atoi(argv[43]), + atoi(argv[44]), + atoi(argv[45]), + atoi(argv[46]), + atoi(argv[47]), + atoi(argv[48]), + atoi(argv[49]), + atoi(argv[50]), + atoi(argv[51]), + atoi(argv[52]), + atoi(argv[53]), + atoi(argv[54])); } result.cleanup(); diff --git a/src/cacti/mat.cc b/src/cacti/mat.cc index 8012e26..f65f88d 100644 --- a/src/cacti/mat.cc +++ b/src/cacti/mat.cc @@ -140,34 +140,53 @@ Mat::Mat(const DynamicParameter &dyn_p) R_wire_sa_mux_dec_out /= 2.0; } - row_dec = - new Decoder(num_dec_signals, false, subarray.C_wl, R_wire_wl_drv_out, - false /*is_fa*/, is_dram, true, camFlag ? cam_cell : cell, - g_ip->power_gating ? true : false, subarray.num_rows); + row_dec = new Decoder(num_dec_signals, + false, + subarray.C_wl, + R_wire_wl_drv_out, + false /*is_fa*/, + is_dram, + true, + camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false, + subarray.num_rows); // row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for // OOO programming if (is_fa && (!dp.is_tag)) // { // row_dec->exist = true; // } - bit_mux_dec = - new Decoder(deg_bl_muxing, // This number is 1 for FA or CAM - false, C_ld_bit_mux_dec_out, R_wire_bit_mux_dec_out, - false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, - g_ip->power_gating ? true : false); + bit_mux_dec = new Decoder(deg_bl_muxing, // This number is 1 for FA or CAM + false, + C_ld_bit_mux_dec_out, + R_wire_bit_mux_dec_out, + false /*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell, + g_ip->power_gating ? true : false); sa_mux_lev_1_dec = new Decoder(dp.deg_senseamp_muxing_non_associativity, // This number is 1 // for FA or CAM dp.number_way_select_signals_mat ? true : false, // only sa_mux_lev_1_dec needs way select signal - C_ld_sa_mux_lev_1_dec_out, R_wire_sa_mux_dec_out, - false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, + C_ld_sa_mux_lev_1_dec_out, + R_wire_sa_mux_dec_out, + false /*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell, g_ip->power_gating ? true : false); sa_mux_lev_2_dec = new Decoder(dp.Ndsam_lev_2, // This number is 1 for FA or CAM - false, C_ld_sa_mux_lev_2_dec_out, R_wire_sa_mux_dec_out, - false /*is_fa*/, is_dram, false, camFlag ? cam_cell : cell, + false, + C_ld_sa_mux_lev_2_dec_out, + R_wire_sa_mux_dec_out, + false /*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell, g_ip->power_gating ? true : false); double C_wire_predec_blk_out; @@ -191,22 +210,40 @@ Mat::Mat(const DynamicParameter &dyn_p) if (is_fa || pure_cam) num_dec_signals += _log2(num_subarrays_per_mat); - PredecBlk *r_predec_blk1 = new PredecBlk( - num_dec_signals, row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, - num_subarrays_per_mat, is_dram, true); - PredecBlk *r_predec_blk2 = new PredecBlk( - num_dec_signals, row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, - num_subarrays_per_mat, is_dram, false); + PredecBlk *r_predec_blk1 = new PredecBlk(num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + true); + PredecBlk *r_predec_blk2 = new PredecBlk(num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + false); PredecBlk *b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); PredecBlk *b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); PredecBlk *sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, - sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + sa_mux_lev_1_dec, + 0, + 0, + 1, + is_dram, + true); PredecBlk *sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, - sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + sa_mux_lev_1_dec, + 0, + 0, + 1, + is_dram, + false); PredecBlk *sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); PredecBlk *sa_mux_lev_2_predec_blk2 = @@ -230,8 +267,8 @@ Mat::Mat(const DynamicParameter &dyn_p) new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); PredecBlkDrv *sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); - way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, - dummy_way_sel_predec_blk1, is_dram); + way_sel_drv1 = new PredecBlkDrv( + dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); @@ -243,9 +280,12 @@ Mat::Mat(const DynamicParameter &dyn_p) new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); subarray_out_wire = - new Wire(Global, (g_ip->cl_vertical ? subarray.area.w : subarray.area.h), - 1, 1, inside_mat); // should be subarray.area.w; if with /2 - // means average length + new Wire(Global, + (g_ip->cl_vertical ? subarray.area.w : subarray.area.h), + 1, + 1, + inside_mat); // should be subarray.area.w; if with /2 + // means average length double driver_c_gate_load; double driver_c_wire_load; @@ -257,41 +297,50 @@ Mat::Mat(const DynamicParameter &dyn_p) // precharge p size is the same driver_c_gate_load = (subarray.num_cols_fa_cam) * - gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, - false, false); + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, + 0, + is_dram, + false, + false); driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - cam_bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + cam_bl_precharge_eq_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); if (!pure_cam) { // This is only used for fully asso not pure CAM driver_c_gate_load = (subarray.num_cols_fa_ram) * - gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, - false, false); + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, + 0, + is_dram, + false, + false); driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); } } else { driver_c_gate_load = subarray.num_cols * - gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, - false, false); + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, + 0, + is_dram, + false, + false); driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); } double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); @@ -323,15 +372,16 @@ Mat::Mat(const DynamicParameter &dyn_p) if ((!is_fa) && (dp.is_tag)) { // tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * // dp.Ndsam_lev_2)) / num_do_b_mat; - h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, - subarray.area.get_w()); + h_comparators = compute_comparators_height( + dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); h_comparators *= (RWP + ERP); } // power-gating circuit bool is_footer = false; double Isat_subarray = - 2 * simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, + 2 * simplified_nmos_Isat(g_tp.sram.cell_nmos_w, + is_dram, true); // only one wordline active in a subarray // 2 means two inverters in an SRAM cell double detalV_array, deltaV_wl, deltaV_floatingBL; @@ -339,19 +389,34 @@ Mat::Mat(const DynamicParameter &dyn_p) if (!(is_fa || pure_cam) && g_ip->power_gating) { // for SRAM only at this moment - c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, + c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, + PCH, + 1, + 1, + cell.h, + is_dram, true); // 1 inv c_wakeup_array += 2 * drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true) + - drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, + drain_C_(g_tp.sram.cell_nmos_w, + NCH, + 1, + 1, + cell.h, + is_dram, true); // 1 inv c_wakeup_array *= subarray.num_rows; // all the SRAM cells in a bitline is connected to // the sleep tx to provide Vcc_min detalV_array = g_tp.sram_cell.Vdd - g_tp.sram_cell.Vcc_min; - sram_sleep_tx = new Sleep_tx(g_ip->perfloss, Isat_subarray, is_footer, - c_wakeup_array, detalV_array, 1, cell); + sram_sleep_tx = new Sleep_tx(g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup_array, + detalV_array, + 1, + cell); subarray.area.set_h(subarray.area.h + sram_sleep_tx->area.h); @@ -531,10 +596,22 @@ double Mat::compute_delays(double inrisetime) { bl_precharge_eq_drv->compute_delay(0); k = ml_to_ram_wl_drv->number_gates - 1; rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); - C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, - 4 * cell.h, is_dram, false, true) + - drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, - 4 * cell.h, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], + PCH, + 1, + 1, + 4 * cell.h, + is_dram, + false, + true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], + NCH, + 1, + 1, + 4 * cell.h, + is_dram, + false, + true); C_ld = ml_to_ram_wl_drv->c_gate_load + ml_to_ram_wl_drv->c_wire_load; tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); @@ -584,10 +661,17 @@ double Mat::compute_delays(double inrisetime) { int k = row_dec->num_gates - 1; double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); // TODO: this 4*cell.h number must be revisited - double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * cell.h, - is_dram, false, true) + - drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, - is_dram, false, true); + double C_intrinsic = + drain_C_(row_dec->w_dec_p[k], + PCH, + 1, + 1, + 4 * cell.h, + is_dram, + false, + true) + + drain_C_( + row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram, false, true); double C_ld = row_dec->C_ld_dec_out; double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; @@ -675,9 +759,10 @@ double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() { } if (dp.Ndsam_lev_2 > 1) { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / - (RWP + ERP)); // sense_amp_mux_height + height += + compute_tr_width_after_folding(g_tp.w_nmos_sa_mux, + cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / + (RWP + ERP)); // sense_amp_mux_height // height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch // * (RWP + ERP); @@ -879,14 +964,17 @@ double Mat::compute_cam_delay(double inrisetime) { // horizontally driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, - 0, is_dram, false, false); + 0, + is_dram, + false, + false); driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - sl_precharge_eq_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + sl_precharge_eq_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); // searchline data driver ; subarray.num_rows + 1 is because of the dummy row // data drv should only have gate_C not 2*gate_C since the two searchlines are @@ -895,12 +983,16 @@ double Mat::compute_cam_delay(double inrisetime) { (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - sl_data_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + sl_data_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); sl_precharge_eq_drv->compute_delay(0); double R_bl_precharge = - tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, + tr_R_on(g_tp.w_pmos_bl_precharge, + PCH, + 1, + is_dram, + false, false); // Assuming CAM and SRAM have same Pre_eq_dr double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; double R_bl = (subarray.num_rows + 1) * r_b_metal; @@ -927,14 +1019,18 @@ double Mat::compute_cam_delay(double inrisetime) { driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - ml_precharge_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + ml_precharge_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); ml_precharge_drv->compute_delay(0); rd = tr_R_on(Wdummyn, NCH, 2, is_dram); c_intrinsic = Htagbits * - (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, + (2 * drain_C_(Wdummyn, + NCH, + 2, + 1, + g_tp.cell_h_def, is_dram) // TODO: the cell_h_def should be revisit + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) / Htagbits); // since each halve only has one precharge tx @@ -1006,13 +1102,16 @@ double Mat::compute_cam_delay(double inrisetime) { // searchline data driver There are two matchline precharge driver chains per // subarray. - driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, + driver_c_gate_load = gate_C(W_hit_miss_n, + 0, + is_dram, + false, false); // nmos of the pull down logic driver_c_wire_load = subarray.C_wl_ram; driver_r_wire_load = subarray.R_wl_ram; - ml_to_ram_wl_drv = new Driver(driver_c_gate_load, driver_c_wire_load, - driver_r_wire_load, is_dram); + ml_to_ram_wl_drv = new Driver( + driver_c_gate_load, driver_c_wire_load, driver_r_wire_load, is_dram); rd = tr_R_on(Wfanorn, NCH, 1, is_dram); c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + @@ -1073,13 +1172,18 @@ double Mat::compute_cam_delay(double inrisetime) { // leakage in one subarray double Iport = - cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, + cmos_Isub_leakage(g_tp.cam.cell_a_w, + 0, + 1, + nmos, + false, true); // TODO: how much is the idle time? just by *2? double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); - double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, - 1, inv, false, true) * - 2; + double Icell = + cmos_Isub_leakage( + g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true) * + 2; double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true) * 2; // approx XOR with Inv @@ -1120,9 +1224,10 @@ double Mat::compute_cam_delay(double inrisetime) { // ports double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, - 1, inv, false, true) * - 2; + double Ig_cell = + cmos_Ig_leakage( + g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true) * + 2; double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true) * 2; // cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; @@ -1174,7 +1279,8 @@ double Mat::width_write_driver_or_write_mux() { return width_write_driver_nmos; } -double Mat::compute_comparators_height(int tagbits, int number_ways_in_mat, +double Mat::compute_comparators_height(int tagbits, + int number_ways_in_mat, double subarray_mem_cell_area_width) { double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def); @@ -1226,13 +1332,17 @@ double Mat::compute_bitline_delay(double inrisetime) { // Leakage current of an SRAM cell double Iport = - cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos, false, + cmos_Isub_leakage(g_tp.sram.cell_a_w, + 0, + 1, + nmos, + false, true); // TODO: how much is the idle time? just by *2? double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos, false, true); double Icell = - cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv, - false, true) * + cmos_Isub_leakage( + g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv, false, true) * 2; // two invs per cell leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd; @@ -1271,27 +1381,43 @@ double Mat::compute_bitline_delay(double inrisetime) { gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; } - double C_drain_bit_mux = drain_C_( - g_tp.w_nmos_b_mux, NCH, 1, 0, - camFlag ? cam_cell.w : cell.w / (2 * (RWP + ERP + SCHP)), is_dram); + double C_drain_bit_mux = + drain_C_(g_tp.w_nmos_b_mux, + NCH, + 1, + 0, + camFlag ? cam_cell.w : cell.w / (2 * (RWP + ERP + SCHP)), + is_dram); double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); double C_drain_sense_amp_iso = drain_C_( - g_tp.w_iso, PCH, 1, 0, + g_tp.w_iso, + PCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + - drain_C_(g_tp.w_sense_n, NCH, 1, 0, + drain_C_(g_tp.w_sense_n, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_sense_p, PCH, 1, 0, + drain_C_(g_tp.w_sense_p, + PCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); double C_drain_sense_amp_mux = drain_C_( - g_tp.w_nmos_sa_mux, NCH, 1, 0, + g_tp.w_nmos_sa_mux, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); @@ -1479,19 +1605,31 @@ double Mat::compute_sa_delay(double inrisetime) { // sensitive to both the output time // constant as well as the magnitude of input differential voltage. double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + - drain_C_(g_tp.w_sense_n, NCH, 1, 0, + drain_C_(g_tp.w_sense_n, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_sense_p, PCH, 1, 0, + drain_C_(g_tp.w_sense_p, + PCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_iso, PCH, 1, 0, + drain_C_(g_tp.w_iso, + PCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + drain_C_(g_tp.w_nmos_sa_mux, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); @@ -1524,7 +1662,10 @@ double Mat::compute_subarray_out_drv(double inrisetime) { rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); C_ld = dp.Ndsam_lev_1 * - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + drain_C_(g_tp.w_nmos_sa_mux, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + @@ -1545,8 +1686,8 @@ double Mat::compute_subarray_out_drv(double inrisetime) { rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, - is_dram) + + drain_C_( + p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); tf = rd * C_ld; this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); @@ -1555,28 +1696,32 @@ double Mat::compute_subarray_out_drv(double inrisetime) { power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power_subarray_out_drv.readOp.leakage += - cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv, is_dram) * + cmos_Isub_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram) * g_tp.peri_global.Vdd; power_subarray_out_drv.readOp.power_gated_leakage += - cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv, is_dram) * + cmos_Isub_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram) * g_tp.peri_global.Vcc_min; power_subarray_out_drv.readOp.gate_leakage += - cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv) * + cmos_Ig_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv) * g_tp.peri_global.Vdd; // inverter driving drain of pass transistor of second level of sense-amp mux. rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); - C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, - is_dram) + - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, - camFlag ? cam_cell.w - : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / - (RWP + ERP + SCHP), - is_dram); + C_ld = + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_( + p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_nmos_sa_mux, + NCH, + 1, + 0, + camFlag ? cam_cell.w + : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / + (RWP + ERP + SCHP), + is_dram); tf = rd * C_ld; this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay_subarray_out_drv += this_delay; @@ -1584,22 +1729,25 @@ double Mat::compute_subarray_out_drv(double inrisetime) { power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power_subarray_out_drv.readOp.leakage += - cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv) * + cmos_Isub_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv) * g_tp.peri_global.Vdd; power_subarray_out_drv.readOp.power_gated_leakage += - cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv, is_dram) * + cmos_Isub_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram) * g_tp.peri_global.Vcc_min; power_subarray_out_drv.readOp.gate_leakage += - cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, - inv) * + cmos_Ig_leakage( + g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv) * g_tp.peri_global.Vdd; // delay of signal through pass-transistor to input of subarray output driver. rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); C_ld = dp.Ndsam_lev_2 * - drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + drain_C_(g_tp.w_nmos_sa_mux, + NCH, + 1, + 0, camFlag ? cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), @@ -1610,7 +1758,8 @@ double Mat::compute_subarray_out_drv(double inrisetime) { (subarray_out_wire->wire_length / subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), - 0.0, is_dram); + 0.0, + is_dram); tf = rd * C_ld; this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay_subarray_out_drv += this_delay; @@ -1648,9 +1797,10 @@ double Mat::compute_comparator_delay(double inrisetime) { // For each degree of associativity // there are 4 such quarter comparators - double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, - 1, inv, is_dram) * - 4 * A; + double lkgCurrent = + cmos_Isub_leakage( + g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram) * + 4 * A; double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram) * 4 * A; @@ -1664,8 +1814,8 @@ double Mat::compute_comparator_delay(double inrisetime) { nextinputtime = st2del / (1.0 - VTHCOMPINV); power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, - inv, is_dram) * + lkgCurrent += cmos_Isub_leakage( + g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram) * 4 * A; gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram) * @@ -1681,8 +1831,8 @@ double Mat::compute_comparator_delay(double inrisetime) { nextinputtime = st3del / (VTHEVALINV); power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, - inv, is_dram) * + lkgCurrent += cmos_Isub_leakage( + g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram) * 4 * A; gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram) * diff --git a/src/cacti/mat.h b/src/cacti/mat.h index aba17d7..f8d2090 100644 --- a/src/cacti/mat.h +++ b/src/cacti/mat.h @@ -152,7 +152,8 @@ class Mat : public Component { private: double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); double width_write_driver_or_write_mux(); - double compute_comparators_height(int tagbits, int number_ways_in_mat, + double compute_comparators_height(int tagbits, + int number_ways_in_mat, double subarray_mem_cell_area_w); double compute_cam_delay(double inrisetime); double compute_bitline_delay(double inrisetime); diff --git a/src/cacti/nuca.cc b/src/cacti/nuca.cc index c9fbcd6..a706ac6 100644 --- a/src/cacti/nuca.cc +++ b/src/cacti/nuca.cc @@ -66,8 +66,17 @@ void Nuca::init_cont() { cont_stats[i /*l2 or l3*/][j /*core*/] [k /*64 or 128 or 256 link bw*/][l /* no banks*/]; assert(fscanf(cont, "%[^\n]\n", line) != EOF); - sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk, &temp[0], &temp[1], - &temp[2], &temp[3], &temp[4], &temp[5], &temp[6], &temp[7]); + sscanf(line, + "%[^:]: %d %d %d %d %d %d %d %d", + jk, + &temp[0], + &temp[1], + &temp[2], + &temp[3], + &temp[4], + &temp[5], + &temp[6], + &temp[7]); } } } @@ -429,7 +438,8 @@ void Nuca::print_nuca(nuca_org_t *fr) { printf("Optimal number of banks - %d\n", fr->bank_count); printf("Grid organization rows x columns - %d x %d\n", fr->rows, fr->columns); printf("Network frequency - %g GHz\n", (1 / fr->nuca_pda.cycle_time) * 1e3); - printf("Cache dimension (mm x mm) - %g x %g\n", fr->nuca_pda.area.h * 1e-3, + printf("Cache dimension (mm x mm) - %g x %g\n", + fr->nuca_pda.area.h * 1e-3, fr->nuca_pda.area.w * 1e-3); fr->router->print_router(); @@ -487,13 +497,16 @@ nuca_org_t *Nuca::find_optimal_nuca(list *n, list::iterator niter; for (niter = n->begin(); niter != n->end(); niter++) { - fprintf(stderr, "\n-----------------------------" - "---------------\n"); + fprintf(stderr, + "\n-----------------------------" + "---------------\n"); printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " "bank_dpower = %g \tleak = %g \tcycle = %g\n", - (*niter)->bank_count, (*niter)->nuca_pda.delay, - (*niter)->nuca_pda.power.readOp.dynamic, (*niter)->h_wire->wt, + (*niter)->bank_count, + (*niter)->nuca_pda.delay, + (*niter)->nuca_pda.power.readOp.dynamic, + (*niter)->h_wire->wt, (*niter)->bank_pda.power.readOp.dynamic, (*niter)->nuca_pda.power.readOp.leakage, (*niter)->nuca_pda.cycle_time); diff --git a/src/cacti/parameter.cc b/src/cacti/parameter.cc index d22e231..4c508f1 100644 --- a/src/cacti/parameter.cc +++ b/src/cacti/parameter.cc @@ -227,11 +227,16 @@ void TechnologyParameter::display(uint32_t indent) { DynamicParameter::DynamicParameter() : use_inp_params(0), cell(), is_valid(true) {} -DynamicParameter::DynamicParameter(bool is_tag_, int pure_ram_, int pure_cam_, - double Nspd_, unsigned int Ndwl_, - unsigned int Ndbl_, unsigned int Ndcm_, +DynamicParameter::DynamicParameter(bool is_tag_, + int pure_ram_, + int pure_cam_, + double Nspd_, + unsigned int Ndwl_, + unsigned int Ndbl_, + unsigned int Ndcm_, unsigned int Ndsam_lev_1_, - unsigned int Ndsam_lev_2_, bool is_main_mem_) + unsigned int Ndsam_lev_2_, + bool is_main_mem_) : is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_), Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), diff --git a/src/cacti/parameter.h b/src/cacti/parameter.h index c1066ba..dd9f4c8 100644 --- a/src/cacti/parameter.h +++ b/src/cacti/parameter.h @@ -344,9 +344,15 @@ class DynamicParameter { double dram_refresh_period; DynamicParameter(); - DynamicParameter(bool is_tag_, int pure_ram_, int pure_cam_, double Nspd_, - unsigned int Ndwl_, unsigned int Ndbl_, unsigned int Ndcm_, - unsigned int Ndsam_lev_1_, unsigned int Ndsam_lev_2_, + DynamicParameter(bool is_tag_, + int pure_ram_, + int pure_cam_, + double Nspd_, + unsigned int Ndwl_, + unsigned int Ndbl_, + unsigned int Ndcm_, + unsigned int Ndsam_lev_1_, + unsigned int Ndsam_lev_2_, bool is_main_mem_); int use_inp_params; diff --git a/src/cacti/powergating.cc b/src/cacti/powergating.cc index a0f62d6..3521a95 100644 --- a/src/cacti/powergating.cc +++ b/src/cacti/powergating.cc @@ -71,7 +71,9 @@ using namespace std; */ Sleep_tx::Sleep_tx(double _perf_with_sleep_tx, double _active_Isat, // of circuit block, not sleep tx - bool _is_footer, double _c_circuit_wakeup, double _V_delta, + bool _is_footer, + double _c_circuit_wakeup, + double _V_delta, int _num_sleep_tx, // double _vt_circuit, // double _vt_sleep_tx, @@ -134,8 +136,15 @@ auto Sleep_tx::compute_penalty() -> double { // no 0.5 because the half of the energy spend in entering sleep and half of // the energy will be spent in waking up. And they are pairs } else { - c_intrinsic_sleep = drain_C_(width * p_to_n_sz_ratio, PCH, 1, 1, area.h, - false, false, false, is_sleep_tx); + c_intrinsic_sleep = drain_C_(width * p_to_n_sz_ratio, + PCH, + 1, + 1, + area.h, + false, + false, + false, + is_sleep_tx); // V_delta = _V_delta; wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep) * V_delta / diff --git a/src/cacti/powergating.h b/src/cacti/powergating.h index b2df692..d77cf1b 100644 --- a/src/cacti/powergating.h +++ b/src/cacti/powergating.h @@ -38,7 +38,9 @@ class Sleep_tx : public Component { public: Sleep_tx(double _perf_with_sleep_tx, double _active_Isat, // of circuit block, not sleep tx - bool _is_footer, double _c_circuit_wakeup, double _V_delta, + bool _is_footer, + double _c_circuit_wakeup, + double _V_delta, int _num_sleep_tx, // double _vt_circuit, // double _vt_sleep_tx, diff --git a/src/cacti/router.cc b/src/cacti/router.cc index 635167d..7005f83 100644 --- a/src/cacti/router.cc +++ b/src/cacti/router.cc @@ -33,8 +33,11 @@ Router::Router(double flit_size_, double vc_buf, /* vc size = vc_buffer_size * flit_size */ - double vc_c, TechnologyParameter::DeviceType *dt, double I_, - double O_, double M_) + double vc_c, + TechnologyParameter::DeviceType *dt, + double I_, + double O_, + double M_) : flit_size(flit_size_), deviceType(dt), I(I_), O(O_), M(M_) { vc_buffer_size = vc_buf; vc_count = vc_c; @@ -73,7 +76,8 @@ Router::Cw3(double length) { double Router::gate_cap(double w) { return (double)gate_C(w * 1e6 /*u*/, 0); } /*Function to calculate the diffusion capacitance*/ -double Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, +double Router::diff_cap(double w, + int type /*0 for n-mos and 1 for p-mos*/, double s /*number of stacking transistors*/) { return (double)drain_C_(w * 1e6 /*u*/, type, (int)s, 1, g_tp.cell_h_def); } diff --git a/src/cacti/router.h b/src/cacti/router.h index 015424f..a97c3b4 100644 --- a/src/cacti/router.h +++ b/src/cacti/router.h @@ -50,7 +50,9 @@ class Router : public Component { double vc_buf, /* vc size = vc_buffer_size * flit_size */ double vc_count, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global), - double I_ = 5, double O_ = 5, double M_ = 0.6); + double I_ = 5, + double O_ = 5, + double M_ = 0.6); ~Router(); void print_router(); @@ -67,8 +69,8 @@ class Router : public Component { double FREQUENCY; // move this to config file --TODO double Cw3(double len); double gate_cap(double w); - double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, - double stack); + double + diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack); enum Wire_type wtype; enum Wire_placement wire_placement; // corssbar diff --git a/src/cacti/subarray.cc b/src/cacti/subarray.cc index b262ec0..8085793 100644 --- a/src/cacti/subarray.cc +++ b/src/cacti/subarray.cc @@ -145,7 +145,8 @@ void Subarray::compute_C() { } else { if (!(is_fa || dp.pure_cam)) { C_wl = (gate_C_pass(g_tp.sram.cell_a_w, - (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0, false, + (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0, + false, true) * 2 + c_w_metal) * @@ -161,7 +162,8 @@ void Subarray::compute_C() { r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um; C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) / 2.0, - false, true) * + false, + true) * 2 + c_w_metal) * num_cols_fa_cam; @@ -173,7 +175,8 @@ void Subarray::compute_C() { r_w_metal = cell.w * g_tp.wire_local.R_per_um; C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0, - false, true) * + false, + true) * 2 + c_w_metal) * num_cols_fa_ram; diff --git a/src/cacti/technology.cc b/src/cacti/technology.cc index 5c03d9b..938660a 100644 --- a/src/cacti/technology.cc +++ b/src/cacti/technology.cc @@ -34,9 +34,12 @@ #include -double wire_resistance(double resistivity, double wire_width, - double wire_thickness, double barrier_thickness, - double dishing_thickness, double alpha_scatter) { +double wire_resistance(double resistivity, + double wire_width, + double wire_thickness, + double barrier_thickness, + double dishing_thickness, + double alpha_scatter) { double resistance; resistance = alpha_scatter * resistivity / ((wire_thickness - barrier_thickness - dishing_thickness) * @@ -44,10 +47,14 @@ double wire_resistance(double resistivity, double wire_width, return (resistance); } -double wire_capacitance(double wire_width, double wire_thickness, - double wire_spacing, double ild_thickness, - double miller_value, double horiz_dielectric_constant, - double vert_dielectric_constant, double fringe_cap) { +double wire_capacitance(double wire_width, + double wire_thickness, + double wire_spacing, + double ild_thickness, + double miller_value, + double horiz_dielectric_constant, + double vert_dielectric_constant, + double fringe_cap) { double vertical_cap, sidewall_cap, total_cap; vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness; @@ -2219,54 +2226,77 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.017; // micron dishing_thickness = 0; // micron alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance( - CU_RESISTIVITY, wire_width, wire_thickness, barrier_thickness, - dishing_thickness, alpha_scatter); // ohm/micron - ild_thickness[0][0] = 0.75; // micron + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); // ohm/micron + ild_thickness[0][0] = 0.75; // micron miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 2.709; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; // F/micron - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap); // F/micron. + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); // F/micron. wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 2.4; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.75; // micron miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 2.709; vert_dielectric_constant[0][1] = 3.9; fringe_cap = 0.115e-15; // F/micron - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 2.2; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 1.5; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 2.709; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; @@ -2277,35 +2307,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.017; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.75; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 3.038; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.75; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 3.038; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2313,17 +2359,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 1.98; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 3.038; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.18; wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); @@ -2338,53 +2392,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.01; // micron dishing_thickness = 0; // micron alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance( - CU_RESISTIVITY, wire_width, wire_thickness, barrier_thickness, - dishing_thickness, alpha_scatter); // ohm/micron - ild_thickness[0][0] = 0.48; // micron + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); // ohm/micron + ild_thickness[0][0] = 0.48; // micron miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 2.709; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; // F/micron - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap); // F/micron. + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); // F/micron. wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 2.4; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.48; // micron miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 2.709; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 2.7; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.96; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 2.709; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; @@ -2395,35 +2472,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.008; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.48; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 3.038; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.48; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 3.038; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2431,17 +2524,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 1.1; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 3.038; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.09; wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); @@ -2456,52 +2557,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][0] = 0.405; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 2.303; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], fringe_cap); + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 2.7; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.405; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 2.303; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 2.8; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.81; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 2.303; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; @@ -2512,35 +2637,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.006; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.405; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.734; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.405; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.734; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2548,17 +2689,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 0.77; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.734; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.065; wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); @@ -2573,52 +2722,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][0] = 0.315; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 1.958; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], fringe_cap); + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 3.0; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.315; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 1.958; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.63; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 1.958; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; @@ -2629,36 +2802,52 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.004; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.315; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.46; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.315; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.46; vert_dielectric_constant[1][1] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2666,17 +2855,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 0.55; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.46; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.045; wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); @@ -2691,52 +2888,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][0] = 0.21; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 1.664; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], fringe_cap); + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 3.0; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.21; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 1.664; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.42; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 1.664; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // Conservative projections wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; @@ -2747,35 +2968,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.003; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.21; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.214; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; aspect_ratio[1][1] = 2.0; wire_width = wire_pitch[1][1] / 2; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.21; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.214; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2783,17 +3020,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 0.385; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.214; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.032; // micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032); // F/micron @@ -2808,52 +3053,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][0] = 0.15; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 1.414; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], fringe_cap); + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; // semi-global wire_width = wire_pitch[0][1] / 2; aspect_ratio[0][1] = 3.0; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.15; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 1.414; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; // global aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.3; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 1.414; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // //************************* // wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global @@ -2914,35 +3183,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.003; dishing_thickness = 0; alpha_scatter = 1.05; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.15; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 2.104; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.15; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 2.104; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2950,17 +3235,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 0.275; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.104; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.022; // micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022); // F/micron @@ -3020,52 +3313,76 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0; dishing_thickness = 0; alpha_scatter = 1; - wire_r_per_micron[0][0] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][0] = 0.108; miller_value[0][0] = 1.5; horiz_dielectric_constant[0][0] = 1.202; vert_dielectric_constant[0][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][0], - miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], fringe_cap); + wire_c_per_micron[0][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][0], + miller_value[0][0], + horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap); wire_pitch[0][1] = 4 * g_ip->F_sz_um; // semi-global aspect_ratio[0][1] = 3.0; wire_width = wire_pitch[0][1] / 2; wire_thickness = aspect_ratio[0][1] * wire_width; wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][1] = 0.108; miller_value[0][1] = 1.5; horiz_dielectric_constant[0][1] = 1.202; vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][1], - miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], fringe_cap); + wire_c_per_micron[0][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][1], + miller_value[0][1], + horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); wire_pitch[0][2] = 8 * g_ip->F_sz_um; // global aspect_ratio[0][2] = 3.0; wire_width = wire_pitch[0][2] / 2; wire_thickness = aspect_ratio[0][2] * wire_width; wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = - wire_resistance(BULK_CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[0][2] = 0.216; miller_value[0][2] = 1.5; horiz_dielectric_constant[0][2] = 1.202; vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[0][2], - miller_value[0][2], horiz_dielectric_constant[0][2], - vert_dielectric_constant[0][2], fringe_cap); + wire_c_per_micron[0][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[0][2], + miller_value[0][2], + horiz_dielectric_constant[0][2], + vert_dielectric_constant[0][2], + fringe_cap); // //************************* // wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global @@ -3126,35 +3443,51 @@ void init_tech_params(double technology, bool is_tag) { barrier_thickness = 0.002; dishing_thickness = 0; alpha_scatter = 1.05; - wire_r_per_micron[1][0] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][0] = 0.108; miller_value[1][0] = 1.5; horiz_dielectric_constant[1][0] = 1.998; vert_dielectric_constant[1][0] = 3.9; fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][0], - miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], fringe_cap); + wire_c_per_micron[1][0] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][0], + miller_value[1][0], + horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); wire_pitch[1][1] = 4 * g_ip->F_sz_um; wire_width = wire_pitch[1][1] / 2; aspect_ratio[1][1] = 2.0; wire_thickness = aspect_ratio[1][1] * wire_width; wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][1] = 0.108; miller_value[1][1] = 1.5; horiz_dielectric_constant[1][1] = 1.998; vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][1], - miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], fringe_cap); + wire_c_per_micron[1][1] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][1], + miller_value[1][1], + horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -3162,17 +3495,25 @@ void init_tech_params(double technology, bool is_tag) { wire_thickness = aspect_ratio[1][2] * wire_width; wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = - wire_resistance(CU_RESISTIVITY, wire_width, wire_thickness, - barrier_thickness, dishing_thickness, alpha_scatter); + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, + wire_width, + wire_thickness, + barrier_thickness, + dishing_thickness, + alpha_scatter); ild_thickness[1][2] = 0.198; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 1.998; vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance( - wire_width, wire_thickness, wire_spacing, ild_thickness[1][2], - miller_value[1][2], horiz_dielectric_constant[1][2], - vert_dielectric_constant[1][2], fringe_cap); + wire_c_per_micron[1][2] = + wire_capacitance(wire_width, + wire_thickness, + wire_spacing, + ild_thickness[1][2], + miller_value[1][2], + horiz_dielectric_constant[1][2], + vert_dielectric_constant[1][2], + fringe_cap); // Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.016; // micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016); // F/micron @@ -3311,10 +3652,12 @@ void init_tech_params(double technology, bool is_tag) { double tf = rd * c_load; g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); double KLOAD = 1; - c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, - g_tp.cell_h_def) + - gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); + c_load = + KLOAD * + (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_( + g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); tf = rd * c_load; g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); } diff --git a/src/cacti/uca.cc b/src/cacti/uca.cc index 4972549..c5670af 100644 --- a/src/cacti/uca.cc +++ b/src/cacti/uca.cc @@ -66,42 +66,106 @@ UCA::UCA(const DynamicParameter &dyn_p) num_do_b_bank *= g_ip->data_assoc; } - htree_in_add = - new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, - num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Add_htree, true); - htree_in_data = - new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, - num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_in_htree, true); - htree_out_data = - new Htree2(g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, - num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_out_htree, true); + htree_in_add = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + 0, + num_do_b_bank, + 0, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Add_htree, + true); + htree_in_data = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + 0, + num_do_b_bank, + 0, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_in_htree, + true); + htree_out_data = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + 0, + num_do_b_bank, + 0, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_out_htree, + true); } else { - htree_in_add = new Htree2( - g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, - num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Add_htree, true); - htree_in_data = new Htree2( - g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, - num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_in_htree, true); - htree_out_data = new Htree2( - g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, - num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_out_htree, true); - htree_in_search = new Htree2( - g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, - num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_in_htree, true); - htree_out_search = new Htree2( - g_ip->wt, bank.area.w, bank.area.h, num_addr_b_bank, num_di_b_bank, - num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir * 2, - num_banks_hor_dir * 2, Data_out_htree, true); + htree_in_add = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + num_si_b_bank, + num_do_b_bank, + num_so_b_bank, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Add_htree, + true); + htree_in_data = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + num_si_b_bank, + num_do_b_bank, + num_so_b_bank, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_in_htree, + true); + htree_out_data = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + num_si_b_bank, + num_do_b_bank, + num_so_b_bank, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_out_htree, + true); + htree_in_search = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + num_si_b_bank, + num_do_b_bank, + num_so_b_bank, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_in_htree, + true); + htree_out_search = new Htree2(g_ip->wt, + bank.area.w, + bank.area.h, + num_addr_b_bank, + num_di_b_bank, + num_si_b_bank, + num_do_b_bank, + num_so_b_bank, + num_banks_ver_dir * 2, + num_banks_hor_dir * 2, + Data_out_htree, + true); } area.w = htree_in_data->area.w; diff --git a/src/cacti/wire.cc b/src/cacti/wire.cc index 432341c..a8bafbc 100644 --- a/src/cacti/wire.cc +++ b/src/cacti/wire.cc @@ -33,8 +33,13 @@ #include "cmath" // use this constructor to calculate wire stats -Wire::Wire(enum Wire_type wire_model, double wl, int n, double w_s, double s_s, - enum Wire_placement wp, double resistivity, +Wire::Wire(enum Wire_type wire_model, + double wl, + int n, + double w_s, + double s_s, + enum Wire_placement wp, + double resistivity, TechnologyParameter::DeviceType *dt) : wt(wire_model), wire_length(wl * 1e-6), nsense(n), w_scale(w_s), s_scale(s_s), resistivity(resistivity), deviceType(dt) { @@ -74,8 +79,10 @@ double Wire::repeater_size_init; // value used in initialization should not be // reused in final output double Wire::repeater_spacing_init; -Wire::Wire(double w_s, double s_s, - /*bool reset_repeater_sizing,*/ enum Wire_placement wp, double resis, +Wire::Wire(double w_s, + double s_s, + /*bool reset_repeater_sizing,*/ enum Wire_placement wp, + double resis, TechnologyParameter::DeviceType *dt) { w_scale = w_s; s_scale = s_s; @@ -144,7 +151,9 @@ void Wire::calculate_wire_stats() { repeater_spacing = global.area.w; repeater_size = global.area.h; area.set_area((wire_length / repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_size, g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); } else if (wt == Global_5) { @@ -156,7 +165,9 @@ void Wire::calculate_wire_stats() { repeater_spacing = global_5.area.w; repeater_size = global_5.area.h; area.set_area((wire_length / repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_size, g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); } else if (wt == Global_10) { @@ -168,7 +179,9 @@ void Wire::calculate_wire_stats() { repeater_spacing = global_10.area.w; repeater_size = global_10.area.h; area.set_area((wire_length / repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_size, g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); } else if (wt == Global_20) { @@ -180,7 +193,9 @@ void Wire::calculate_wire_stats() { repeater_spacing = global_20.area.w; repeater_size = global_20.area.h; area.set_area((wire_length / repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_size, g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); } else if (wt == Global_30) { @@ -192,7 +207,9 @@ void Wire::calculate_wire_stats() { repeater_spacing = global_30.area.w; repeater_size = global_30.area.h; area.set_area((wire_length / repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_size, g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); } @@ -239,15 +256,21 @@ double Wire::signal_fall_time() { drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(min_w_pmos, PCH, 1); - rt = horowitz(0, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, FALL) / + rt = horowitz(0, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + FALL) / (deviceType->Vdd - deviceType->Vth); timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(g_tp.min_w_nmos_, NCH, 1); - ft = horowitz(rt, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE) / + ft = horowitz(rt, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + RISE) / deviceType->Vth; return ft; } @@ -264,15 +287,21 @@ double Wire::signal_rise_time() { drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(g_tp.min_w_nmos_, NCH, 1); - rt = horowitz(0, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE) / + rt = horowitz(0, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + RISE) / deviceType->Vth; timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(min_w_pmos, PCH, 1); - ft = horowitz(rt, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, FALL) / + ft = horowitz(rt, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + FALL) / (deviceType->Vdd - deviceType->Vth); return ft; // sec } @@ -453,8 +482,11 @@ void Wire::low_swing_model() { double timeconst = res_eq * cap_eq; - delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, RISE); + delay = horowitz(inputrise, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + RISE); double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd; inputrise = @@ -471,8 +503,11 @@ void Wire::low_swing_model() { gate_C(nsize, 0); timeconst = res_eq * cap_eq; - delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, - deviceType->Vth / deviceType->Vdd, FALL); + delay += horowitz(inputrise, + timeconst, + deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, + FALL); temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd; transmitter.delay = delay; @@ -603,7 +638,9 @@ void Wire::delay_optimal_wire(/*bool reset_repeater_sizing*/) { repeater_scaling * tc; area.set_area((len / repeater_spacing_init) * - compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, + compute_gate_area(INV, + 1, + min_w_pmos * repeater_scaling, g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)); power.readOp.dynamic = @@ -611,11 +648,15 @@ void Wire::delay_optimal_wire(/*bool reset_repeater_sizing*/) { power.readOp.leakage = ((len / repeater_spacing_init) * deviceType->Vdd * cmos_Isub_leakage(g_tp.min_w_nmos_ * repeater_scaling, - beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)); + beta * g_tp.min_w_nmos_ * repeater_scaling, + 1, + inv)); power.readOp.gate_leakage = ((len / repeater_spacing_init) * deviceType->Vdd * cmos_Ig_leakage(g_tp.min_w_nmos_ * repeater_scaling, - beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)); + beta * g_tp.min_w_nmos_ * repeater_scaling, + 1, + inv)); } // calculate power/delay values for wires with suboptimal repeater @@ -761,12 +802,16 @@ powerDef Wire::wire_model(double space, double size, double *delay) { ptemp.readOp.leakage = ((len / repeater_spacing) * deviceType->Vdd * cmos_Isub_leakage(g_tp.min_w_nmos_ * repeater_size, - beta * g_tp.min_w_nmos_ * repeater_size, 1, inv)); + beta * g_tp.min_w_nmos_ * repeater_size, + 1, + inv)); ptemp.readOp.gate_leakage = ((len / repeater_spacing) * deviceType->Vdd * cmos_Ig_leakage(g_tp.min_w_nmos_ * repeater_size, - beta * g_tp.min_w_nmos_ * repeater_size, 1, inv)); + beta * g_tp.min_w_nmos_ * repeater_size, + 1, + inv)); return ptemp; } diff --git a/src/cacti/wire.h b/src/cacti/wire.h index 4f6df7e..0094593 100644 --- a/src/cacti/wire.h +++ b/src/cacti/wire.h @@ -43,15 +43,18 @@ class Wire : public Component { public: - Wire(enum Wire_type wire_model, double len /* in u*/, + Wire(enum Wire_type wire_model, + double len /* in u*/, int nsense = 1 /* no. of sense amps connected to the low-swing wire */, - double width_scaling = 1, double spacing_scaling = 1, + double width_scaling = 1, + double spacing_scaling = 1, enum Wire_placement wire_placement = outside_mat, double resistivity = CU_RESISTIVITY, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Wire(); - Wire(double width_scaling = 1, double spacing_scaling = 1, + Wire(double width_scaling = 1, + double spacing_scaling = 1, // bool reset_repeater_sizing = true, enum Wire_placement wire_placement = outside_mat, double resistivity = CU_RESISTIVITY, diff --git a/src/core.cc b/src/core.cc index 0ba101d..83d0d27 100644 --- a/src/core.cc +++ b/src/core.cc @@ -44,9 +44,11 @@ #include //#include "globalvar.h" -InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, +InstFetchU::InstFetchU(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_) + const CoreDynParam &dyn_p_, + bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), IB(0), BTB(0), ID_inst(0), ID_operand(0), ID_misc(0), exist(exist_) { @@ -100,8 +102,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, "icache", Core_device, - coredynp.opt_local, coredynp.core_ty); + icache.caches = new ArrayST(&interface_ip, + "icache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); scktRatio = g_tp.sckt_co_eff; chip_PR_overhead = g_tp.chip_layout_overhead; macro_PR_overhead = g_tp.macro_layout_overhead; @@ -153,8 +158,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + icache.missb = new ArrayST(&interface_ip, + "icacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); icache.area.set_area(icache.area.get_area() + icache.missb->local_result.area); area.set_area(area.get_area() + icache.missb->local_result.area); @@ -188,8 +196,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + icache.ifb = new ArrayST(&interface_ip, + "icacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); area.set_area(area.get_area() + icache.ifb->local_result.area); // output_data_csv(icache.ifb.local_result); @@ -226,9 +237,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = - new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + icache.prefetchb = new ArrayST(&interface_ip, + "icacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); icache.area.set_area(icache.area.get_area() + icache.prefetchb->local_result.area); area.set_area(area.get_area() + icache.prefetchb->local_result.area); @@ -273,7 +286,10 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, + IB = new ArrayST(&interface_ip, + "InstBuffer", + Core_device, + coredynp.opt_local, coredynp.core_ty); IB->area.set_area(IB->area.get_area() + IB->local_result.area); area.set_area(area.get_area() + IB->local_result.area); @@ -337,8 +353,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + BTB = new ArrayST(&interface_ip, + "Branch Target Buffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); area.set_area(area.get_area() + BTB->local_result.area); /// cout<<"area="<area.get_area()); } - ID_inst = new inst_decoder(is_default, &interface_ip, coredynp.opcode_length, + ID_inst = new inst_decoder(is_default, + &interface_ip, + coredynp.opcode_length, 1 /*Decoder should not know how many by itself*/, - coredynp.x86, Core_device, coredynp.core_ty); - - ID_operand = - new inst_decoder(is_default, &interface_ip, coredynp.arch_ireg_width, 1, - coredynp.x86, Core_device, coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, &interface_ip, - 8 /* Prefix field etc upto 14B*/, 1, coredynp.x86, - Core_device, coredynp.core_ty); + coredynp.x86, + Core_device, + coredynp.core_ty); + + ID_operand = new inst_decoder(is_default, + &interface_ip, + coredynp.arch_ireg_width, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); + + ID_misc = new inst_decoder(is_default, + &interface_ip, + 8 /* Prefix field etc upto 14B*/, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); // TODO: X86 decoder should decode the inst in cyclic mode under the control // of squencer. So the dynamic power should be multiplied by a few times. area.set_area(area.get_area() + @@ -366,9 +397,11 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, int ithCore_, coredynp.decodeW); } -BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, +BranchPredictor::BranchPredictor(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_) + const CoreDynParam &dyn_p_, + bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), globalBPT(0), localBPT(0), L1_localBPT(0), L2_localBPT(0), chooser(0), RAS(0), exist(exist_) { @@ -421,8 +454,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, - coredynp.opt_local, coredynp.core_ty); + globalBPT = new ArrayST(&interface_ip, + "Global Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); globalBPT->area.set_area(globalBPT->area.get_area() + globalBPT->local_result.area); area.set_area(area.get_area() + globalBPT->local_result.area); @@ -446,8 +482,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, - coredynp.opt_local, coredynp.core_ty); + L1_localBPT = new ArrayST(&interface_ip, + "L1 local Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); L1_localBPT->area.set_area(L1_localBPT->area.get_area() + L1_localBPT->local_result.area); area.set_area(area.get_area() + L1_localBPT->local_result.area); @@ -471,8 +510,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, - coredynp.opt_local, coredynp.core_ty); + L2_localBPT = new ArrayST(&interface_ip, + "L2 local Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); L2_localBPT->area.set_area(L2_localBPT->area.get_area() + L2_localBPT->local_result.area); area.set_area(area.get_area() + L2_localBPT->local_result.area); @@ -496,8 +538,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, - coredynp.opt_local, coredynp.core_ty); + chooser = new ArrayST(&interface_ip, + "Predictor Chooser", + Core_device, + coredynp.opt_local, + coredynp.core_ty); chooser->area.set_area(chooser->area.get_area() + chooser->local_result.area); area.set_area(area.get_area() + chooser->local_result.area); @@ -521,17 +566,19 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, - coredynp.core_ty); + RAS = new ArrayST( + &interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); RAS->area.set_area(RAS->area.get_area() + RAS->local_result.area * coredynp.num_hthreads); area.set_area(area.get_area() + RAS->local_result.area * coredynp.num_hthreads); } -SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, +SchedulerU::SchedulerU(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_) + const CoreDynParam &dyn_p_, + bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), int_inst_window(0), fp_inst_window(0), ROB(0), instruction_selection(0), exist(exist_) { @@ -579,8 +626,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, - coredynp.opt_local, coredynp.core_ty); + int_inst_window = new ArrayST(&interface_ip, + "InstFetchQueue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); int_inst_window->area.set_area(int_inst_window->area.get_area() + int_inst_window->local_result.area * coredynp.num_pipelines); @@ -600,9 +650,12 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, interface_ip.assoc = 1; // reset to prevent unnecessary warning messages when init_interface instruction_selection = new selection_logic( - is_default, XML->sys.core[ithCore].instruction_window_size, + is_default, + XML->sys.core[ithCore].instruction_window_size, coredynp.peak_issueW * XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, Core_device, coredynp.core_ty); + &interface_ip, + Core_device, + coredynp.core_ty); } if (coredynp.core_ty == OOO) { @@ -666,8 +719,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, - coredynp.opt_local, coredynp.core_ty); + int_inst_window = new ArrayST(&interface_ip, + tmp_name, + Core_device, + coredynp.opt_local, + coredynp.core_ty); int_inst_window->area.set_area(int_inst_window->area.get_area() + int_inst_window->local_result.area * coredynp.num_pipelines); @@ -715,8 +771,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = coredynp.fp_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, - coredynp.opt_local, coredynp.core_ty); + fp_inst_window = new ArrayST(&interface_ip, + tmp_name, + Core_device, + coredynp.opt_local, + coredynp.core_ty); fp_inst_window->area.set_area(fp_inst_window->area.get_area() + fp_inst_window->local_result.area * coredynp.num_fp_pipelines); @@ -847,8 +906,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + ROB = new ArrayST(&interface_ip, + "ReorderBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); ROB->area.set_area(ROB->area.get_area() + ROB->local_result.area * coredynp.num_pipelines); area.set_area(area.get_area() + @@ -856,15 +918,21 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, int ithCore_, ROB_height = ROB->local_result.cache_ht; } - instruction_selection = new selection_logic( - is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); + instruction_selection = + new selection_logic(is_default, + XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW, + &interface_ip, + Core_device, + coredynp.core_ty); } } -LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, +LoadStoreU::LoadStoreU(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_) + const CoreDynParam &dyn_p_, + bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), LSQ(0), LoadQ(0), exist(exist_) { if (!exist) @@ -922,8 +990,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, - coredynp.opt_local, coredynp.core_ty); + dcache.caches = new ArrayST(&interface_ip, + "dcache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); dcache.area.set_area(dcache.area.get_area() + dcache.caches->local_result.area); area.set_area(area.get_area() + dcache.caches->local_result.area); @@ -959,8 +1030,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + dcache.missb = new ArrayST(&interface_ip, + "dcacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); dcache.area.set_area(dcache.area.get_area() + dcache.missb->local_result.area); area.set_area(area.get_area() + dcache.missb->local_result.area); @@ -992,8 +1066,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + dcache.ifb = new ArrayST(&interface_ip, + "dcacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); dcache.area.set_area(dcache.area.get_area() + dcache.ifb->local_result.area); area.set_area(area.get_area() + dcache.ifb->local_result.area); // output_data_csv(dcache.ifb.local_result); @@ -1028,9 +1105,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = - new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, - coredynp.opt_local, coredynp.core_ty); + dcache.prefetchb = new ArrayST(&interface_ip, + "dcacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); dcache.area.set_area(dcache.area.get_area() + dcache.prefetchb->local_result.area); area.set_area(area.get_area() + dcache.prefetchb->local_result.area); @@ -1064,8 +1143,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, - coredynp.opt_local, coredynp.core_ty); + dcache.wbb = new ArrayST(&interface_ip, + "dcacheWBB", + Core_device, + coredynp.opt_local, + coredynp.core_ty); dcache.area.set_area(dcache.area.get_area() + dcache.wbb->local_result.area); area.set_area(area.get_area() + dcache.wbb->local_result.area); @@ -1103,8 +1185,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, - coredynp.opt_local, coredynp.core_ty); + LSQ = new ArrayST(&interface_ip, + "Load(Store)Queue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); LSQ->area.set_area(LSQ->area.get_area() + LSQ->local_result.area); area.set_area(area.get_area() + LSQ->local_result.area); // output_data_csv(LSQ.LSQ.local_result); @@ -1134,8 +1219,11 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, - coredynp.opt_local, coredynp.core_ty); + LoadQ = new ArrayST(&interface_ip, + "LoadQueue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); area.set_area(area.get_area() + LoadQ->local_result.area); // output_data_csv(LoadQ.LoadQ.local_result); @@ -1146,8 +1234,10 @@ LoadStoreU::LoadStoreU(ParseXML *XML_interface, int ithCore_, area.set_area(area.get_area() * cdb_overhead); } -MemManU::MemManU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, +MemManU::MemManU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), itlb(0), dtlb(0), exist(exist_) { @@ -1196,8 +1286,8 @@ MemManU::MemManU(ParseXML *XML_interface, int ithCore_, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, - coredynp.core_ty); + itlb = new ArrayST( + &interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); itlb->area.set_area(itlb->area.get_area() + itlb->local_result.area); area.set_area(area.get_area() + itlb->local_result.area); // output_data_csv(itlb.tlb.local_result); @@ -1235,15 +1325,17 @@ MemManU::MemManU(ParseXML *XML_interface, int ithCore_, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, - coredynp.core_ty); + dtlb = new ArrayST( + &interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); dtlb->area.set_area(dtlb->area.get_area() + dtlb->local_result.area); area.set_area(area.get_area() + dtlb->local_result.area); // output_data_csv(dtlb.tlb.local_result); } -RegFU::RegFU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, +RegFU::RegFU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), IRF(0), FRF(0), RFWIN(0), exist(exist_) { @@ -1281,8 +1373,11 @@ RegFU::RegFU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 2 * coredynp.peak_issueW; interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, - coredynp.opt_local, coredynp.core_ty); + IRF = new ArrayST(&interface_ip, + "Integer Register File", + Core_device, + coredynp.opt_local, + coredynp.core_ty); IRF->area.set_area(IRF->area.get_area() + IRF->local_result.area * coredynp.num_pipelines * cdb_overhead * @@ -1320,8 +1415,11 @@ RegFU::RegFU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 2 * XML->sys.core[ithCore].issue_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, - coredynp.opt_local, coredynp.core_ty); + FRF = new ArrayST(&interface_ip, + "Floating point Register File", + Core_device, + coredynp.opt_local, + coredynp.core_ty); FRF->area.set_area(FRF->area.get_area() + FRF->local_result.area * coredynp.num_fp_pipelines * cdb_overhead * @@ -1377,8 +1475,11 @@ RegFU::RegFU(ParseXML *XML_interface, int ithCore_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, - coredynp.opt_local, coredynp.core_ty); + RFWIN = new ArrayST(&interface_ip, + "RegWindow", + Core_device, + coredynp.opt_local, + coredynp.core_ty); RFWIN->area.set_area(RFWIN->area.get_area() + RFWIN->local_result.area * coredynp.num_pipelines); area.set_area(area.get_area() + @@ -1387,9 +1488,12 @@ RegFU::RegFU(ParseXML *XML_interface, int ithCore_, } } -EXECU::EXECU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, double lsq_height_, - const CoreDynParam &dyn_p_, bool exist_) +EXECU::EXECU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), fp_u(0), exeu(0), mul(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), @@ -1437,50 +1541,99 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, } if (coredynp.core_ty == Inorder) { - int_bypass = new interconnect( - "Int Bypass Data", Core_device, 1, 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, - 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + int_bypass = + new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32), + rfu->int_regfile_height + exeu->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect( - "Int Bypass tag", Core_device, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + lsq_height + - scheu->Iw_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->int_regfile_height + exeu->FU_height + + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + intTagBypass->area.get_area()); if (coredynp.num_muls > 0) { - int_mul_bypass = new interconnect( - "Mul Bypass Data", Core_device, 1, 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + int_mul_bypass = + new interconnect("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_mul_bypass->area.get_area()); - intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height + scheu->Iw_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTag_mul_Bypass = + new interconnect("Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + intTag_mul_Bypass->area.get_area()); } if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect( - "FP Bypass Data", Core_device, 1, 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, false, - 1.0, coredynp.opt_local, coredynp.core_ty); + fp_bypass = + new interconnect("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + fp_u->FU_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); - fpTagBypass = new interconnect( - "FP Bypass tag", Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + - scheu->Iw_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + fpTagBypass = new interconnect("FP Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + fp_u->FU_height + + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + fpTagBypass->area.get_area()); } @@ -1491,33 +1644,67 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, * windows and register files, while tag broadcast interconnects also * cover across ROB */ - int_bypass = new interconnect( - "Int Bypass Data", Core_device, 1, 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, - 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + int_bypass = new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect( - "Int Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + intTagBypass->area.get_area()); if (coredynp.num_muls > 0) { - int_mul_bypass = new interconnect( - "Mul Bypass Data", Core_device, 1, 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + int_mul_bypass = + new interconnect("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + "Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_mul_bypass->area.get_area()); bypass.area.set_area(bypass.area.get_area() + @@ -1525,16 +1712,32 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, } if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect("FP Bypass Data", Core_device, 1, 1, + fp_bypass = new interconnect("FP Bypass Data", + Core_device, + 1, + 1, int(ceil(coredynp.fp_data_width)), rfu->fp_regfile_height + fp_u->FU_height, - &interface_ip, 3, false, 1.0, - coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); fpTagBypass = new interconnect( - "FP Bypass tag", Core_device, 1, 1, coredynp.phy_freg_width, + "FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); bypass.area.set_area(bypass.area.get_area() + @@ -1545,33 +1748,67 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, * In RS based processor both data and tag are broadcast together, * covering functional units, lsq, nst windows, register files, and ROBs */ - int_bypass = new interconnect( - "Int Bypass Data", Core_device, 1, 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTagBypass = new interconnect( - "Int Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + int_bypass = new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + lsq_height + scheu->Iw_height + + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); bypass.area.set_area(bypass.area.get_area() + intTagBypass->area.get_area()); if (coredynp.num_muls > 0) { int_mul_bypass = new interconnect( - "Mul Bypass Data", Core_device, 1, 1, + "Mul Bypass Data", + Core_device, + 1, + 1, int(ceil(coredynp.int_data_width)), rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", Core_device, 1, 1, coredynp.phy_ireg_width, + "Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + int_mul_bypass->area.get_area()); bypass.area.set_area(bypass.area.get_area() + @@ -1579,17 +1816,34 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, } if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect( - "FP Bypass Data", Core_device, 1, 1, - int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + fp_bypass = new interconnect("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u->FU_height + + lsq_height + scheu->fp_Iw_height + + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); fpTagBypass = new interconnect( - "FP Bypass tag", Core_device, 1, 1, coredynp.phy_freg_width, + "FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, 3, false, 1.0, coredynp.opt_local, coredynp.core_ty); + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); bypass.area.set_area(bypass.area.get_area() + @@ -1600,8 +1854,10 @@ EXECU::EXECU(ParseXML *XML_interface, int ithCore_, area.set_area(area.get_area() + bypass.area.get_area()); } -RENAMINGU::RENAMINGU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, +RENAMINGU::RENAMINGU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), iFRAT(0), fFRAT(0), iRRAT(0), fRRAT(0), ifreeL(0), @@ -1693,8 +1949,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); area.set_area(area.get_area() + iFRAT->area.get_area()); @@ -1723,8 +1982,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); area.set_area(area.get_area() + fFRAT->area.get_area()); @@ -1759,8 +2021,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); area.set_area(area.get_area() + iFRAT->area.get_area()); @@ -1794,8 +2059,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); area.set_area(area.get_area() + fFRAT->area.get_area()); } @@ -1833,8 +2101,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); area.set_area(area.get_area() + iRRAT->area.get_area()); @@ -1862,8 +2133,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); area.set_area(area.get_area() + fRRAT->area.get_area()); } @@ -1899,8 +2173,11 @@ used for index the RAT entry to be updated. // every cycle, (coredynp.decodeW -1) inst may need to send back it dest // tags, committW insts needs to update freelist buffers interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, - coredynp.opt_local, coredynp.core_ty); + ifreeL = new ArrayST(&interface_ip, + "Int Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); ifreeL->area.set_area(ifreeL->area.get_area() + ifreeL->local_result.area); area.set_area(area.get_area() + ifreeL->area.get_area()); @@ -1927,17 +2204,21 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, "FP Free List", Core_device, - coredynp.opt_local, coredynp.core_ty); + ffreeL = new ArrayST(&interface_ip, + "FP Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); ffreeL->area.set_area(ffreeL->area.get_area() + ffreeL->local_result.area); area.set_area(area.get_area() + ffreeL->area.get_area()); idcl = new dep_resource_conflict_check( - &interface_ip, coredynp, + &interface_ip, + coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, - coredynp.phy_freg_width); + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { @@ -1967,8 +2248,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iFRAT->local_result.adjust_area(); // iFRAT->local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO @@ -2002,8 +2286,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fFRAT->local_result.adjust_area(); // fFRAT->local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO @@ -2041,8 +2328,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); area.set_area(area.get_area() + iFRAT->area.get_area()); @@ -2075,8 +2365,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); area.set_area(area.get_area() + fFRAT->area.get_area()); } @@ -2108,8 +2401,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); area.set_area(area.get_area() + iRRAT->area.get_area()); @@ -2137,8 +2433,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, - coredynp.opt_local, coredynp.core_ty); + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); area.set_area(area.get_area() + fRRAT->area.get_area()); } @@ -2166,17 +2465,21 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, - coredynp.opt_local, coredynp.core_ty); + ifreeL = new ArrayST(&interface_ip, + "Unified Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); // ifreeL->area.set_area(ifreeL->area.get_area()+ // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); area.set_area(area.get_area() + ifreeL->area.get_area()); idcl = new dep_resource_conflict_check( - &interface_ip, coredynp, + &interface_ip, + coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, - coredynp.phy_freg_width); + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); } } if (coredynp.core_ty == Inorder && coredynp.issueW > 1) { @@ -2185,10 +2488,11 @@ used for index the RAT entry to be updated. * must. */ idcl = new dep_resource_conflict_check( - &interface_ip, coredynp, + &interface_ip, + coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip, coredynp, - coredynp.phy_freg_width); + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); } } @@ -2217,8 +2521,8 @@ Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); mmu = new MemManU(XML, ithCore, &interface_ip, coredynp, exit_flag); - exu = new EXECU(XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, - exit_flag); + exu = new EXECU( + XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); if (coredynp.core_ty == OOO) { rnu = new RENAMINGU(XML, ithCore, &interface_ip, coredynp); @@ -3274,11 +3578,17 @@ void RENAMINGU::computeEnergy(bool is_tdp) { if (coredynp.issueW > 1) { idcl->power_t.reset(); fdcl->power_t.reset(); - set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, - coredynp.num_hthreads, idcl->stats_t.readAc.access); + set_pppm(pppm_t, + idcl->stats_t.readAc.access, + coredynp.num_hthreads, + coredynp.num_hthreads, + idcl->stats_t.readAc.access); idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, - coredynp.num_hthreads, idcl->stats_t.readAc.access); + set_pppm(pppm_t, + fdcl->stats_t.readAc.access, + coredynp.num_hthreads, + coredynp.num_hthreads, + idcl->stats_t.readAc.access); fdcl->power_t = fdcl->power * pppm_t; } } @@ -4745,7 +5055,10 @@ void EXECU::computeEnergy(bool is_tdp) { if (is_tdp) { set_pppm( - pppm_t, 2 * coredynp.ALU_cdb_duty_cycle, 2, 2, + pppm_t, + 2 * coredynp.ALU_cdb_duty_cycle, + 2, + 2, 2 * coredynp .ALU_cdb_duty_cycle); // 2 means two source operands needs to be // passed for each int instruction. @@ -4753,7 +5066,10 @@ void EXECU::computeEnergy(bool is_tdp) { int_bypass->power * pppm_t; if (coredynp.num_muls > 0) { set_pppm( - pppm_t, 2 * coredynp.MUL_cdb_duty_cycle, 2, 2, + pppm_t, + 2 * coredynp.MUL_cdb_duty_cycle, + 2, + 2, 2 * coredynp .MUL_cdb_duty_cycle); // 2 means two source operands needs to // be passed for each int instruction. @@ -4763,7 +5079,10 @@ void EXECU::computeEnergy(bool is_tdp) { } if (coredynp.num_fpus > 0) { set_pppm( - pppm_t, 3 * coredynp.FPU_cdb_duty_cycle, 3, 3, + pppm_t, + 3 * coredynp.FPU_cdb_duty_cycle, + 3, + 3, 3 * coredynp .FPU_cdb_duty_cycle); // 3 means three source operands needs // to be passed for each fp instruction. @@ -4774,13 +5093,19 @@ void EXECU::computeEnergy(bool is_tdp) { power = power + rfu->power + exeu->power + bypass.power + scheu->power; } else { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_alu_accesses, + 2, + 2, XML->sys.core[ithCore].cdb_alu_accesses); bypass.rt_power = bypass.rt_power + intTagBypass->power * pppm_t; bypass.rt_power = bypass.rt_power + int_bypass->power * pppm_t; if (coredynp.num_muls > 0) { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_mul_accesses, + 2, + 2, XML->sys.core[ithCore] .cdb_mul_accesses); // 2 means two source operands needs to // be passed for each int instruction. @@ -4790,7 +5115,10 @@ void EXECU::computeEnergy(bool is_tdp) { } if (coredynp.num_fpus > 0) { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_fpu_accesses, + 3, + 3, XML->sys.core[ithCore].cdb_fpu_accesses); bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; @@ -4935,7 +5263,8 @@ void Core::computeEnergy(bool is_tdp) { num_units = 5.0; rnu->computeEnergy(is_tdp); set_pppm( - pppm_t, coredynp.num_pipelines / num_units, + pppm_t, + coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / @@ -5029,7 +5358,8 @@ void Core::computeEnergy(bool is_tdp) { } else { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; } - set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); @@ -5051,7 +5381,8 @@ void Core::computeEnergy(bool is_tdp) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.IFU_duty_cycle * coredynp.total_cycles; } - set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); @@ -5067,7 +5398,8 @@ void Core::computeEnergy(bool is_tdp) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.LSU_duty_cycle * coredynp.total_cycles; } - set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); @@ -5084,7 +5416,8 @@ void Core::computeEnergy(bool is_tdp) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.ALU_duty_cycle * coredynp.total_cycles; } - set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); @@ -5101,7 +5434,8 @@ void Core::computeEnergy(bool is_tdp) { (0.5 + 0.5 * coredynp.LSU_duty_cycle) * coredynp.total_cycles; } - set_pppm(pppm_t, coredynp.num_pipelines * rtp_pipeline_coe / num_units, + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); @@ -5748,8 +6082,11 @@ void Core::set_core_param() { ? true : false; coredynp.executionTime = XML->sys.total_cycles / coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, - coredynp.num_hthreads, 0); + set_pppm(coredynp.pppm_lkg_multhread, + 0, + coredynp.num_hthreads, + coredynp.num_hthreads, + 0); // does not care device types, since all core device types are set at sys. // level diff --git a/src/core.h b/src/core.h index f81cf2d..12e99b0 100644 --- a/src/core.h +++ b/src/core.h @@ -56,8 +56,10 @@ class BranchPredictor : public Component { ArrayST *RAS; bool exist; - BranchPredictor(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + BranchPredictor(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exsit = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -82,8 +84,10 @@ class InstFetchU : public Component { inst_decoder *ID_misc; bool exist; - InstFetchU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + InstFetchU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exsit = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -105,8 +109,10 @@ class SchedulerU : public Component { selection_logic *instruction_selection; bool exist; - SchedulerU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + SchedulerU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -131,8 +137,10 @@ class RENAMINGU : public Component { ArrayST *RAHT; // register alias history table Used to store GC bool exist; - RENAMINGU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + RENAMINGU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -155,8 +163,10 @@ class LoadStoreU : public Component { ArrayST *LoadQ; bool exist; - LoadStoreU(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + LoadStoreU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -175,8 +185,11 @@ class MemManU : public Component { ArrayST *dtlb; bool exist; - MemManU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_ = true); + MemManU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~MemManU(); @@ -196,8 +209,11 @@ class RegFU : public Component { ArrayST *RFWIN; bool exist; - RegFU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_ = true); + RegFU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~RegFU(); @@ -227,8 +243,12 @@ class EXECU : public Component { Component bypass; bool exist; - EXECU(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, - double lsq_height_, const CoreDynParam &dyn_p_, bool exist_ = true); + EXECU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_ = true); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~EXECU(); diff --git a/src/interconnect.cc b/src/interconnect.cc index b186b63..29015e3 100644 --- a/src/interconnect.cc +++ b/src/interconnect.cc @@ -37,13 +37,21 @@ #include #include -interconnect::interconnect(string name_, enum Device_ty device_ty_, - double base_w, double base_h, int data_w, double len, +interconnect::interconnect(string name_, + enum Device_ty device_ty_, + double base_w, + double base_h, + int data_w, + double len, const InputParameter *configure_interface, - int start_wiring_level_, bool pipelinable_, - double route_over_perc_, bool opt_local_, - enum Core_type core_ty_, enum Wire_type wire_model, - double width_s, double space_s, + int start_wiring_level_, + bool pipelinable_, + double route_over_perc_, + bool opt_local_, + enum Core_type core_ty_, + enum Wire_type wire_model, + double width_s, + double space_s, TechnologyParameter::DeviceType *dt) : name(name_), device_ty(device_ty_), in_rise_time(0), out_rise_time(0), base_width(base_w), base_height(base_h), data_width(data_w), diff --git a/src/interconnect.h b/src/interconnect.h index 972f449..faf173f 100644 --- a/src/interconnect.h +++ b/src/interconnect.h @@ -46,13 +46,20 @@ class interconnect : public Component { public: - interconnect(string name_, enum Device_ty device_ty_, double base_w, - double base_h, int data_w, double len, + interconnect(string name_, + enum Device_ty device_ty_, + double base_w, + double base_h, + int data_w, + double len, const InputParameter *configure_interface, - int start_wiring_level_, bool pipelinable_ = false, - double route_over_perc_ = 0.5, bool opt_local_ = true, + int start_wiring_level_, + bool pipelinable_ = false, + double route_over_perc_ = 0.5, + bool opt_local_ = true, enum Core_type core_ty_ = Inorder, - enum Wire_type wire_model = Global, double width_s = 1.0, + enum Wire_type wire_model = Global, + double width_s = 1.0, double space_s = 1.0, TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); diff --git a/src/logic.cc b/src/logic.cc index 452a43f..f782c9c 100644 --- a/src/logic.cc +++ b/src/logic.cc @@ -32,7 +32,8 @@ #include "logic.h" // selection_logic -selection_logic::selection_logic(bool _is_default, int win_entries_, +selection_logic::selection_logic(bool _is_default, + int win_entries_, int issue_width_, const InputParameter *configure_interface, enum Device_ty device_ty_, @@ -102,7 +103,10 @@ void selection_logic::selection_power() { // based on cost effective superscalar 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) + 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(WSelPp, PCH, 4, 1, + drain_C_(WSelPp, + PCH, + 4, + 1, g_tp.cell_h_def) + // precompute priority logic 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) + 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) + @@ -118,7 +122,9 @@ void selection_logic::selection_power() { // based on cost effective superscalar power.readOp.leakage = issue_width * num_arbiter * (cmos_Isub_leakage( - WSelPn, WSelPp, 2, + WSelPn, + WSelPp, + 2, nor) /*approximate precompute with a nor gate*/ // grant1p + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor) // grant2p + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor) // grant3p @@ -131,7 +137,9 @@ void selection_logic::selection_power() { // based on cost effective superscalar power.readOp.gate_leakage = issue_width * num_arbiter * (cmos_Ig_leakage( - WSelPn, WSelPp, 2, + WSelPn, + WSelPp, + 2, nor) /*approximate precompute with a nor gate*/ // grant1p + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor) // grant2p + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor) // grant3p @@ -144,8 +152,10 @@ void selection_logic::selection_power() { // based on cost effective superscalar } dep_resource_conflict_check::dep_resource_conflict_check( - const InputParameter *configure_interface, const CoreDynParam &dyn_p_, - int compare_bits_, bool _is_default) + const InputParameter *configure_interface, + const CoreDynParam &dyn_p_, + int compare_bits_, + bool _is_default) : l_ip(*configure_interface), coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) { Wcompn = 25 * l_ip.F_sz_um; // this was 20.0 micron for the 0.8 micron process @@ -254,8 +264,11 @@ void dep_resource_conflict_check::leakage_feedback(double temperature) { // TODO: add inverter and transmission gate base DFF. -DFFCell::DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, - double _cell_load, const InputParameter *configure_interface) +DFFCell::DFFCell(bool _is_dram, + double _WdecNANDn, + double _WdecNANDp, + double _cell_load, + const InputParameter *configure_interface) : is_dram(_is_dram), cell_load(_cell_load), WdecNANDn(_WdecNANDn), WdecNANDp(_WdecNANDp) { // this model is based on the NAND2 based DFF. l_ip = *configure_interface; @@ -317,8 +330,10 @@ void DFFCell::compute_DFF_cell() { } Pipeline::Pipeline(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, enum Device_ty device_ty_, - bool _is_core_pipeline, bool _is_default) + const CoreDynParam &dyn_p_, + enum Device_ty device_ty_, + bool _is_core_pipeline, + bool _is_default) : l_ip(*configure_interface), coredynp(dyn_p_), device_ty(device_ty_), is_core_pipeline(_is_core_pipeline), is_default(_is_default), num_piperegs(0.0) @@ -522,7 +537,8 @@ void Pipeline::compute_stage_vector() { } } -FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, +FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, enum FU_type fu_type_) @@ -552,13 +568,16 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage( - 5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * - g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction // in FPU usually it can have up to 20 cycles. // base_energy = coredynp.core_ty==Inorder? 0: @@ -583,12 +602,14 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * g_tp.peri_global.Vdd / 2; // base_energy = coredynp.core_ty==Inorder? @@ -612,12 +633,14 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * g_tp.peri_global.Vdd / 2; // base_energy = coredynp.core_ty==Inorder? @@ -652,13 +675,16 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage( - 5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * - g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction // in FPU usually it can have up to 20 cycles. base_energy = coredynp.core_ty == Inorder @@ -682,12 +708,14 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * g_tp.peri_global.Vdd / 2; base_energy = coredynp.core_ty == Inorder @@ -711,12 +739,14 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * g_tp.peri_global.Vdd / 2; base_energy = coredynp.core_ty == Inorder @@ -931,26 +961,31 @@ void FunctionalUnit::leakage_feedback(double temperature) { 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, inv) * g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * - g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W } else if (fu_type == ALU) { area_t = 280 * 260 * 2 * num_fu * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd / 2; } else if (fu_type == MUL) { area_t = 280 * 260 * 2 * 3 * num_fu * @@ -958,12 +993,15 @@ void FunctionalUnit::leakage_feedback(double temperature) { leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(20 * g_tp.min_w_nmos_, 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, inv) * + 1, + inv) * g_tp.peri_global.Vdd / 2; // unit W gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd / 2; } else { cout << "Unknown Functional Unit Type" << endl; @@ -980,9 +1018,12 @@ void FunctionalUnit::leakage_feedback(double temperature) { power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; } -UndiffCore::UndiffCore(ParseXML *XML_interface, int ithCore_, +UndiffCore::UndiffCore(ParseXML *XML_interface, + int ithCore_, InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, bool exist_, bool embedded_) + const CoreDynParam &dyn_p_, + bool exist_, + bool embedded_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), core_ty(coredynp.core_ty), embedded(XML->sys.Embedded), pipeline_stage(coredynp.pipeline_stages), @@ -1038,18 +1079,20 @@ UndiffCore::UndiffCore(ParseXML *XML_interface, int ithCore_, // undifferentiated_core = 3*1e6; // undifferentiated_core *= // g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = - undifferentiated_core * - (core_tx_density)*cmos_Isub_leakage( - 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, - inv) * - g_tp.peri_global.Vdd; // unit W - power.readOp.gate_leakage = - undifferentiated_core * - (core_tx_density)*cmos_Ig_leakage( - 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, - inv) * - g_tp.peri_global.Vdd; + power.readOp.leakage = undifferentiated_core * + (core_tx_density)*cmos_Isub_leakage( + 5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; // unit W + power.readOp.gate_leakage = undifferentiated_core * + (core_tx_density)*cmos_Ig_leakage( + 5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); @@ -1145,8 +1188,11 @@ void UndiffCore::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { inst_decoder::inst_decoder(bool _is_default, const InputParameter *configure_interface, - int opcode_length_, int num_decoders_, bool x86_, - enum Device_ty device_ty_, enum Core_type core_ty_) + int opcode_length_, + int num_decoders_, + bool x86_, + enum Device_ty device_ty_, + enum Core_type core_ty_) : is_default(_is_default), opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_), device_ty(device_ty_), core_ty(core_ty_) { @@ -1192,27 +1238,36 @@ inst_decoder::inst_decoder(bool _is_default, load_nmos_width = g_tp.max_w_nmos_ / 2; load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; C_driver_load = - 1024 * gate_C(load_nmos_width + load_pmos_width, 0, + 1024 * gate_C(load_nmos_width + load_pmos_width, + 0, is_dram); // TODO: this number 1024 needs to be revisited R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; - final_dec = new Decoder(num_decoded_signals, false, C_driver_load, - R_wire_load, false /*is_fa*/, false /*is_dram*/, + final_dec = new Decoder(num_decoded_signals, + false, + C_driver_load, + R_wire_load, + false /*is_fa*/, + false /*is_dram*/, false /*wl_tr*/, // to use peri device cell); PredecBlk *predec_blk1 = - new PredecBlk(num_decoded_signals, final_dec, + new PredecBlk(num_decoded_signals, + final_dec, 0, // Assuming predec and dec are back to back 0, 1, // Each Predec only drives one final dec - false /*is_dram*/, true); + false /*is_dram*/, + true); PredecBlk *predec_blk2 = - new PredecBlk(num_decoded_signals, final_dec, + new PredecBlk(num_decoded_signals, + final_dec, 0, // Assuming predec and dec are back to back 0, 1, // Each Predec only drives one final dec - false /*is_dram*/, false); + false /*is_dram*/, + false); PredecBlkDrv *predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); PredecBlkDrv *predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); @@ -1258,10 +1313,14 @@ void inst_decoder::inst_decoder_delay_power() { outrisetime = pre_dec->compute_delays(inrisetime); dec_outrisetime = final_dec->compute_delays(outrisetime); - set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments, - squencer_passes * num_decoder_segments, num_decoder_segments); + set_pppm(pppm_t, + squencer_passes * num_decoder_segments, + num_decoder_segments, + squencer_passes * num_decoder_segments, + num_decoder_segments); power = power + pre_dec->power * pppm_t; - set_pppm(pppm_t, squencer_passes * num_decoder_segments, + set_pppm(pppm_t, + squencer_passes * num_decoder_segments, num_decoder_segments * num_decoded_signals, num_decoder_segments * num_decoded_signals, squencer_passes * num_decoder_segments); @@ -1277,11 +1336,15 @@ void inst_decoder::leakage_feedback(double temperature) { double pppm_t[4] = {1, 1, 1, 1}; double squencer_passes = x86 ? 2 : 1; - set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments, - squencer_passes * num_decoder_segments, num_decoder_segments); + set_pppm(pppm_t, + squencer_passes * num_decoder_segments, + num_decoder_segments, + squencer_passes * num_decoder_segments, + num_decoder_segments); power = pre_dec->power * pppm_t; - set_pppm(pppm_t, squencer_passes * num_decoder_segments, + set_pppm(pppm_t, + squencer_passes * num_decoder_segments, num_decoder_segments * num_decoded_signals, num_decoder_segments * num_decoded_signals, squencer_passes * num_decoder_segments); diff --git a/src/logic.h b/src/logic.h index ee45ec7..aea2a52 100644 --- a/src/logic.h +++ b/src/logic.h @@ -52,7 +52,9 @@ using namespace std; class selection_logic : public Component { public: selection_logic( - bool _is_default, int win_entries_, int issue_width_, + bool _is_default, + int win_entries_, + int issue_width_, const InputParameter *configure_interface, enum Device_ty device_ty_ = Core_device, enum Core_type core_ty_ = Inorder); //, const ParseXML *_XML_interface); @@ -73,7 +75,8 @@ class selection_logic : public Component { class dep_resource_conflict_check : public Component { public: dep_resource_conflict_check(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, int compare_bits_, + const CoreDynParam &dyn_p_, + int compare_bits_, bool _is_default = true); InputParameter l_ip; uca_org_t local_result; @@ -95,8 +98,11 @@ class dep_resource_conflict_check : public Component { class inst_decoder : public Component { public: - inst_decoder(bool _is_default, const InputParameter *configure_interface, - int opcode_length_, int num_decoders_, bool x86_, + inst_decoder(bool _is_default, + const InputParameter *configure_interface, + int opcode_length_, + int num_decoders_, + bool x86_, enum Device_ty device_ty_ = Core_device, enum Core_type core_ty_ = Inorder); inst_decoder(); @@ -125,8 +131,11 @@ class inst_decoder : public Component { class DFFCell : public Component { public: - DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, - double _cell_load, const InputParameter *configure_interface); + DFFCell(bool _is_dram, + double _WdecNANDn, + double _WdecNANDp, + double _cell_load, + const InputParameter *configure_interface); InputParameter l_ip; bool is_dram; double cell_load; @@ -150,8 +159,10 @@ class DFFCell : public Component { class Pipeline : public Component { public: Pipeline(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, enum Device_ty device_ty_ = Core_device, - bool _is_core_pipeline = true, bool _is_default = true); + const CoreDynParam &dyn_p_, + enum Device_ty device_ty_ = Core_device, + bool _is_core_pipeline = true, + bool _is_default = true); InputParameter l_ip; uca_org_t local_result; CoreDynParam coredynp; @@ -201,8 +212,10 @@ class FunctionalUnit : public Component { statsDef stats_t; powerDef power_t; - FunctionalUnit(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, + FunctionalUnit(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, enum FU_type fu_type); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -211,9 +224,12 @@ class FunctionalUnit : public Component { class UndiffCore : public Component { public: - UndiffCore(ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exist_ = true, bool embedded_ = false); + UndiffCore(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true, + bool embedded_ = false); ParseXML *XML; int ithCore; InputParameter interface_ip; diff --git a/src/main.cc b/src/main.cc index 100f667..c23fac7 100644 --- a/src/main.cc +++ b/src/main.cc @@ -31,6 +31,7 @@ #include "XML_Parse.h" #include "globalvar.h" #include "io.h" +#include "options.h" #include "processor.h" #include "version.h" #include "xmlParser.h" @@ -42,55 +43,21 @@ using namespace std; void print_usage(char *argv0); int main(int argc, char *argv[]) { - char *fb; - bool infile_specified = false; - int plevel = 2; - opt_for_clk = true; - // cout.precision(10); - if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help")) { - print_usage(argv[0]); - } - - for (int32_t i = 0; i < argc; i++) { - if (argv[i] == string("-infile")) { - infile_specified = true; - i++; - fb = argv[i]; - } - - if (argv[i] == string("-print_level")) { - i++; - plevel = atoi(argv[i]); - } + mcpat::Options opt; - if (argv[i] == string("-opt_for_clk")) { - i++; - opt_for_clk = (bool)atoi(argv[i]); - } - } - if (infile_specified == false) { - print_usage(argv[0]); + if (!opt.parse(argc, argv)) { + return 1; } + opt_for_clk = opt.opt_for_clk; cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " << VER_UPDATE << ") is computing the target processor...\n " << endl; // parse XML-based interface ParseXML *p1 = new ParseXML(); - p1->parse(fb); + p1->parse(opt.input_xml); Processor proc(p1); - proc.displayEnergy(2, plevel); + proc.displayEnergy(2, opt.print_level); delete p1; return 0; } - -void print_usage(char *argv0) { - cerr << "How to use McPAT:" << endl; - cerr << " mcpat -infile -print_level < level of details " - "0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for " - "target clock rate)>" - << endl; - // cerr << " Note:default print level is at processor level, please - // increase it to see the details" << endl; - exit(1); -} diff --git a/src/memoryctrl.cc b/src/memoryctrl.cc index b312474..e5d6205 100644 --- a/src/memoryctrl.cc +++ b/src/memoryctrl.cc @@ -73,7 +73,8 @@ * */ -MCBackend::MCBackend(InputParameter *interface_ip_, const MCParam &mcp_, +MCBackend::MCBackend(InputParameter *interface_ip_, + const MCParam &mcp_, enum MemoryCtrl_type mc_type_) : l_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_) { @@ -115,12 +116,16 @@ void MCBackend::compute() { power_t.readOp.leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W power_t.readOp.gate_leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W } else { @@ -158,12 +163,16 @@ void MCBackend::compute() { power_t.readOp.leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W power_t.readOp.gate_leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W power_t.readOp.dynamic *= 1.2; power_t.readOp.leakage *= 1.2; @@ -213,7 +222,8 @@ void MCBackend::computeEnergy(bool is_tdp) { } } -MCPHY::MCPHY(InputParameter *interface_ip_, const MCParam &mcp_, +MCPHY::MCPHY(InputParameter *interface_ip_, + const MCParam &mcp_, enum MemoryCtrl_type mc_type_) : l_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_) { @@ -252,12 +262,16 @@ void MCPHY::compute() { power_t.readOp.leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W power_t.readOp.gate_leakage = area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, 1, inv) * + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * g_tp.peri_global.Vdd; // unit W } else { @@ -350,8 +364,10 @@ void MCPHY::computeEnergy(bool is_tdp) { } } -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, InputParameter *interface_ip_, - const MCParam &mcp_, enum MemoryCtrl_type mc_type_) +MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_) : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_), MC_arb(0), frontendBuffer(0), readBuffer(0), writeBuffer(0) { /* All computations are for a single MC @@ -404,9 +420,11 @@ MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, InputParameter *interface_ip_, // selection and arbitration logic interface_ip.assoc = 1; // reset to prevent unnecessary warning messages when init_interface - MC_arb = - new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel, - 1, &interface_ip, Uncore_device); + MC_arb = new selection_logic(is_default, + XML->sys.mc.req_window_size_per_channel, + 1, + &interface_ip, + Uncore_device); // read buffers. data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation diff --git a/src/memoryctrl.h b/src/memoryctrl.h index 1c5507b..e942ead 100644 --- a/src/memoryctrl.h +++ b/src/memoryctrl.h @@ -52,7 +52,8 @@ class MCBackend : public Component { statsDef rtp_stats; statsDef stats_t; powerDef power_t; - MCBackend(InputParameter *interface_ip_, const MCParam &mcp_, + MCBackend(InputParameter *interface_ip_, + const MCParam &mcp_, enum MemoryCtrl_type mc_type_); void compute(); void computeEnergy(bool is_tdp = true); @@ -70,7 +71,8 @@ class MCPHY : public Component { statsDef rtp_stats; statsDef stats_t; powerDef power_t; - MCPHY(InputParameter *interface_ip_, const MCParam &mcp_, + MCPHY(InputParameter *interface_ip_, + const MCParam &mcp_, enum MemoryCtrl_type mc_type_); void compute(); void computeEnergy(bool is_tdp = true); @@ -89,8 +91,10 @@ class MCFrontEnd : public Component { ArrayST *readBuffer; ArrayST *writeBuffer; - MCFrontEnd(ParseXML *XML_interface, InputParameter *interface_ip_, - const MCParam &mcp_, enum MemoryCtrl_type mc_type_); + MCFrontEnd(ParseXML *XML_interface, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~MCFrontEnd(); @@ -108,7 +112,8 @@ class MemoryController : public Component { Pipeline *pipeLogic; // clock_network clockNetwork; - MemoryController(ParseXML *XML_interface, InputParameter *interface_ip_, + MemoryController(ParseXML *XML_interface, + InputParameter *interface_ip_, enum MemoryCtrl_type mc_type_); void set_mc_param(); void computeEnergy(bool is_tdp = true); diff --git a/src/noc.cc b/src/noc.cc index 4fed6ad..a58ab8d 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -43,8 +43,11 @@ #include #include -NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, - double M_traffic_pattern_, double link_len_) +NoC::NoC(ParseXML *XML_interface, + int ithNoC_, + InputParameter *interface_ip_, + double M_traffic_pattern_, + double link_len_) : XML(XML_interface), ithNoC(ithNoC_), interface_ip(*interface_ip_), router(0), link_bus(0), link_bus_exist(false), router_exist(false), M_traffic_pattern(M_traffic_pattern_) { @@ -85,11 +88,14 @@ NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, } void NoC::init_router() { - router = new Router( - nocdynp.flit_size, - nocdynp.virtual_channel_per_port * nocdynp.input_buffer_entries_per_vc, - nocdynp.virtual_channel_per_port, &(g_tp.peri_global), - nocdynp.input_ports, nocdynp.output_ports, M_traffic_pattern); + router = new Router(nocdynp.flit_size, + nocdynp.virtual_channel_per_port * + nocdynp.input_buffer_entries_per_vc, + nocdynp.virtual_channel_per_port, + &(g_tp.peri_global), + nocdynp.input_ports, + nocdynp.output_ports, + M_traffic_pattern); // router->print_router(); area.set_area(area.get_area() + router->area.get_area() * nocdynp.total_nodes); @@ -160,8 +166,15 @@ void NoC ::init_link_bus(double link_len_) { if (nocdynp.total_nodes > 1) link_len /= 2; // All links are shared by neighbors - link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size, - link_len, &interface_ip, 3, true /*pipelinable*/, + link_bus = new interconnect(name, + Uncore_device, + 1, + 1, + nocdynp.flit_size, + link_len, + &interface_ip, + 3, + true /*pipelinable*/, nocdynp.route_over_perc); link_bus_tot_per_Router.area.set_area( @@ -183,25 +196,35 @@ void NoC::computeEnergy(bool is_tdp) { if (router_exist) { set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern router->power = router->power * pppm_t; - set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, - nocdynp.total_nodes, nocdynp.total_nodes); + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); power = power + router->power * pppm_t; } if (link_bus_exist) { if (nocdynp.type) - set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), - nocdynp.global_linked_ports, nocdynp.global_linked_ports, + set_pppm(pppm_t, + 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), + nocdynp.global_linked_ports, + nocdynp.global_linked_ports, nocdynp.global_linked_ports); // reset traffic pattern; local port do not have router links else - set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports), - nocdynp.global_linked_ports, nocdynp.global_linked_ports, + set_pppm(pppm_t, + 1 * M_traffic_pattern * M * (nocdynp.min_ports), + nocdynp.global_linked_ports, + nocdynp.global_linked_ports, nocdynp.global_linked_ports); // reset traffic pattern link_bus_tot_per_Router.power = link_bus->power * pppm_t; - set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, - nocdynp.total_nodes, nocdynp.total_nodes); + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); power = power + link_bus_tot_per_Router.power * pppm_t; } } else { diff --git a/src/noc.h b/src/noc.h index bacb0cd..27af3f4 100644 --- a/src/noc.h +++ b/src/noc.h @@ -60,15 +60,19 @@ class NoC : public Component { bool router_exist; string name, link_name; double M_traffic_pattern; - NoC(ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, - double M_traffic_pattern_ = 0.6, double link_len_ = 0); + NoC(ParseXML *XML_interface, + int ithNoC_, + InputParameter *interface_ip_, + double M_traffic_pattern_ = 0.6, + double link_len_ = 0); void set_noc_param(); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void init_link_bus(double link_len_); void init_router(); void computeEnergy_link_bus(bool is_tdp = true); - void displayEnergy_link_bus(uint32_t indent = 0, int plevel = 100, + void displayEnergy_link_bus(uint32_t indent = 0, + int plevel = 100, bool is_tdp = true); ~NoC(); }; diff --git a/src/options.cc b/src/options.cc new file mode 100644 index 0000000..a8ca396 --- /dev/null +++ b/src/options.cc @@ -0,0 +1,86 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * + * Author: + * Andrew Smith + ***************************************************************************/ +#include "options.h" + +#include +#include +#include + +bool mcpat::Options::parse(int argc, char **argv) { + // clang-format off + po::options_description desc("General Options"); + desc.add_options() + ("help,h", "Display help message") + ; + + po::options_description io("IO Options"); + io.add_options() + ("infile,i", po::value(&input_xml), "Input XML File") + ("print_level,p", po::value(&print_level), "How detailed to print device tree; [1,5] being most detailed"); + ; + + po::options_description serialization("Serialization Options"); + serialization.add_options() + ("serial_path", po::value(&serialization_path), "Path/to/serialization") + ("serial_create", po::value(&serialization_create)->default_value(true), "Create A Serialization Checkpoint") + ("serial_restore", po::value(&serialization_restore)->default_value(true), "Restore from a Serialization Checkpoint") + ; + + po::options_description optimization("Optimization Options"); + optimization.add_options() + ("opt_for_clk,o", po::value(&opt_for_clk)->default_value(true), "0: optimize for ED^2P only; 1: optimzed for target clock rate") + ; + // clang-format on + + po::options_description all_options; + all_options.add(desc); + all_options.add(io); + all_options.add(serialization); + all_options.add(optimization); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, all_options), vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << all_options << "\n"; + return false; + } + if (input_xml == "") { + std::cerr << "Must specify an Input XML File; \"./mcpat --help\" for more " + "options\n"; + return false; + } + return true; +} diff --git a/src/options.h b/src/options.h new file mode 100644 index 0000000..486fd20 --- /dev/null +++ b/src/options.h @@ -0,0 +1,66 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * + * Author: + * Andrew Smith + ***************************************************************************/ + +#ifndef __OPTIONS_H__ +#define __OPTIONS_H__ + +#include +#include +#include + +namespace mcpat { + +namespace po = boost::program_options; + +class Options { +public: + // IO Options + std::string input_xml = ""; + int print_level = 2; + + // Optimization Options + bool opt_for_clk = true; + + // Serialization Options + std::string serialization_path = ""; + bool serialization_create = false; + bool serialization_restore = false; + + // Get Options from Command Line + bool parse(int argc, char **argv); +}; // class Options + +} // namespace mcpat + +#endif // __OPTIONS_H__ diff --git a/src/processor.cc b/src/processor.cc index cf9ba5a..d72221b 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -104,11 +104,17 @@ Processor::Processor(ParseXML *XML_interface) if (procdynp.homoCore) { core.area.set_area(core.area.get_area() + cores[i]->area.get_area() * procdynp.numCore); - set_pppm(pppm_t, cores[i]->clockRate * procdynp.numCore, procdynp.numCore, - procdynp.numCore, procdynp.numCore); + set_pppm(pppm_t, + cores[i]->clockRate * procdynp.numCore, + procdynp.numCore, + procdynp.numCore, + procdynp.numCore); core.power = core.power + cores[i]->power * pppm_t; - set_pppm(pppm_t, 1 / cores[i]->executionTime, procdynp.numCore, - procdynp.numCore, procdynp.numCore); + set_pppm(pppm_t, + 1 / cores[i]->executionTime, + procdynp.numCore, + procdynp.numCore, + procdynp.numCore); core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; area.set_area(area.get_area() + core.area.get_area()); // placement and routing overhead is @@ -143,11 +149,17 @@ Processor::Processor(ParseXML *XML_interface) if (procdynp.homoL2) { l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area() * procdynp.numL2); - set_pppm(pppm_t, l2array[i]->cachep.clockRate * procdynp.numL2, - procdynp.numL2, procdynp.numL2, procdynp.numL2); + set_pppm(pppm_t, + l2array[i]->cachep.clockRate * procdynp.numL2, + procdynp.numL2, + procdynp.numL2, + procdynp.numL2); l2.power = l2.power + l2array[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, procdynp.numL2, - procdynp.numL2, procdynp.numL2); + set_pppm(pppm_t, + 1 / l2array[i]->cachep.executionTime, + procdynp.numL2, + procdynp.numL2, + procdynp.numL2); l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; area.set_area( area.get_area() + @@ -185,11 +197,17 @@ Processor::Processor(ParseXML *XML_interface) if (procdynp.homoL3) { l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area() * procdynp.numL3); - set_pppm(pppm_t, l3array[i]->cachep.clockRate * procdynp.numL3, - procdynp.numL3, procdynp.numL3, procdynp.numL3); + set_pppm(pppm_t, + l3array[i]->cachep.clockRate * procdynp.numL3, + procdynp.numL3, + procdynp.numL3, + procdynp.numL3); l3.power = l3.power + l3array[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, procdynp.numL3, - procdynp.numL3, procdynp.numL3); + set_pppm(pppm_t, + 1 / l3array[i]->cachep.executionTime, + procdynp.numL3, + procdynp.numL3, + procdynp.numL3); l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; area.set_area(area.get_area() + l3.area.get_area()); // placement and routing overhead is @@ -222,11 +240,17 @@ Processor::Processor(ParseXML *XML_interface) if (procdynp.homoL1Dir) { l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area() * procdynp.numL1Dir); - set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, - procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); + set_pppm(pppm_t, + l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, + procdynp.numL1Dir, + procdynp.numL1Dir, + procdynp.numL1Dir); l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, - procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); + set_pppm(pppm_t, + 1 / l1dirarray[i]->cachep.executionTime, + procdynp.numL1Dir, + procdynp.numL1Dir, + procdynp.numL1Dir); l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; area.set_area( area.get_area() + @@ -257,11 +281,17 @@ Processor::Processor(ParseXML *XML_interface) if (procdynp.homoL2Dir) { l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area() * procdynp.numL2Dir); - set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, - procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); + set_pppm(pppm_t, + l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, + procdynp.numL2Dir, + procdynp.numL2Dir, + procdynp.numL2Dir); l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, - procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); + set_pppm(pppm_t, + 1 / l2dirarray[i]->cachep.executionTime, + procdynp.numL2Dir, + procdynp.numL2Dir, + procdynp.numL2Dir); l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; area.set_area( area.get_area() + @@ -292,13 +322,18 @@ Processor::Processor(ParseXML *XML_interface) mc->area.get_area() * XML->sys.mc.number_mcs); area.set_area(area.get_area() + mc->area.get_area() * XML->sys.mc.number_mcs); - set_pppm(pppm_t, XML->sys.mc.number_mcs * mc->mcp.clockRate, - XML->sys.mc.number_mcs, XML->sys.mc.number_mcs, + set_pppm(pppm_t, + XML->sys.mc.number_mcs * mc->mcp.clockRate, + XML->sys.mc.number_mcs, + XML->sys.mc.number_mcs, XML->sys.mc.number_mcs); mcs.power = mc->power * pppm_t; power = power + mcs.power; - set_pppm(pppm_t, 1 / mc->mcp.executionTime, XML->sys.mc.number_mcs, - XML->sys.mc.number_mcs, XML->sys.mc.number_mcs); + set_pppm(pppm_t, + 1 / mc->mcp.executionTime, + XML->sys.mc.number_mcs, + XML->sys.mc.number_mcs, + XML->sys.mc.number_mcs); mcs.rt_power = mc->rt_power * pppm_t; rt_power = rt_power + mcs.rt_power; } @@ -329,13 +364,17 @@ Processor::Processor(ParseXML *XML_interface) niu->area.get_area() * XML->sys.niu.number_units); area.set_area(area.get_area() + niu->area.get_area() * XML->sys.niu.number_units); - set_pppm(pppm_t, XML->sys.niu.number_units * niu->niup.clockRate, - XML->sys.niu.number_units, XML->sys.niu.number_units, + set_pppm(pppm_t, + XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units, + XML->sys.niu.number_units, XML->sys.niu.number_units); nius.power = niu->power * pppm_t; power = power + nius.power; - set_pppm(pppm_t, XML->sys.niu.number_units * niu->niup.clockRate, - XML->sys.niu.number_units, XML->sys.niu.number_units, + set_pppm(pppm_t, + XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units, + XML->sys.niu.number_units, XML->sys.niu.number_units); nius.rt_power = niu->rt_power * pppm_t; rt_power = rt_power + nius.rt_power; @@ -349,13 +388,17 @@ Processor::Processor(ParseXML *XML_interface) pcie->area.get_area() * XML->sys.pcie.number_units); area.set_area(area.get_area() + pcie->area.get_area() * XML->sys.pcie.number_units); - set_pppm(pppm_t, XML->sys.pcie.number_units * pcie->pciep.clockRate, - XML->sys.pcie.number_units, XML->sys.pcie.number_units, + set_pppm(pppm_t, + XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units, + XML->sys.pcie.number_units, XML->sys.pcie.number_units); pcies.power = pcie->power * pppm_t; power = power + pcies.power; - set_pppm(pppm_t, XML->sys.pcie.number_units * pcie->pciep.clockRate, - XML->sys.pcie.number_units, XML->sys.pcie.number_units, + set_pppm(pppm_t, + XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units, + XML->sys.pcie.number_units, XML->sys.pcie.number_units); pcies.rt_power = pcie->rt_power * pppm_t; rt_power = rt_power + pcies.rt_power; @@ -375,7 +418,10 @@ Processor::Processor(ParseXML *XML_interface) } } else { // Bus based interconnect nocs.push_back( - new NoC(XML, i, &interface_ip, 1, + new NoC(XML, + i, + &interface_ip, + 1, sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + @@ -420,11 +466,17 @@ Processor::Processor(ParseXML *XML_interface) nocs[i]->computeEnergy(); nocs[i]->computeEnergy(false); if (procdynp.homoNOC) { - set_pppm(pppm_t, procdynp.numNOC * nocs[i]->nocdynp.clockRate, - procdynp.numNOC, procdynp.numNOC, procdynp.numNOC); + set_pppm(pppm_t, + procdynp.numNOC * nocs[i]->nocdynp.clockRate, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); noc.power = noc.power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, procdynp.numNOC, - procdynp.numNOC, procdynp.numNOC); + set_pppm(pppm_t, + 1 / nocs[i]->nocdynp.executionTime, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; power = power + noc.power; rt_power = rt_power + noc.rt_power; diff --git a/src/sharedcache.cc b/src/sharedcache.cc index deb9f60..038bb94 100644 --- a/src/sharedcache.cc +++ b/src/sharedcache.cc @@ -46,7 +46,8 @@ #include #include -SharedCache::SharedCache(ParseXML *XML_interface, int ithCache_, +SharedCache::SharedCache(ParseXML *XML_interface, + int ithCache_, InputParameter *interface_ip_, enum cache_level cacheL_) : XML(XML_interface), ithCache(ithCache_), interface_ip(*interface_ip_), @@ -166,8 +167,8 @@ SharedCache::SharedCache(ParseXML *XML_interface, int ithCache_, interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 1; - unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, - true, core_t); + unicache.missb = new ArrayST( + &interface_ip, cachep.name + "MissB", device_t, true, core_t); unicache.area.set_area(unicache.area.get_area() + unicache.missb->local_result.area); area.set_area(area.get_area() + unicache.missb->local_result.area); @@ -192,8 +193,8 @@ SharedCache::SharedCache(ParseXML *XML_interface, int ithCache_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, - true, core_t); + unicache.ifb = new ArrayST( + &interface_ip, cachep.name + "FillB", device_t, true, core_t); unicache.area.set_area(unicache.area.get_area() + unicache.ifb->local_result.area); area.set_area(area.get_area() + unicache.ifb->local_result.area); @@ -221,8 +222,8 @@ SharedCache::SharedCache(ParseXML *XML_interface, int ithCache_, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", - device_t, true, core_t); + unicache.prefetchb = new ArrayST( + &interface_ip, cachep.name + "PrefetchB", device_t, true, core_t); unicache.area.set_area(unicache.area.get_area() + unicache.prefetchb->local_result.area); area.set_area(area.get_area() + unicache.prefetchb->local_result.area); diff --git a/src/sharedcache.h b/src/sharedcache.h index b288326..6dc3648 100644 --- a/src/sharedcache.h +++ b/src/sharedcache.h @@ -59,8 +59,10 @@ class SharedCache : public Component { double scktRatio, executionTime; // Component L2Tot, cc, cc1, ccTot; - SharedCache(ParseXML *XML_interface, int ithCache_, - InputParameter *interface_ip_, enum cache_level cacheL_ = L2); + SharedCache(ParseXML *XML_interface, + int ithCache_, + InputParameter *interface_ip_, + enum cache_level cacheL_ = L2); void set_cache_param(); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, bool is_tdp = true); diff --git a/src/xmlParser.cc b/src/xmlParser.cc index beca68c..432cd98 100644 --- a/src/xmlParser.cc +++ b/src/xmlParser.cc @@ -140,10 +140,12 @@ typedef struct { int l; XMLCHAR c; } XMLCharacterEntity; -static XMLCharacterEntity XMLEntities[] = { - {_CXML("&"), 5, _CXML('&')}, {_CXML("<"), 4, _CXML('<')}, - {_CXML(">"), 4, _CXML('>')}, {_CXML("""), 6, _CXML('\"')}, - {_CXML("'"), 6, _CXML('\'')}, {NULL, 0, '\0'}}; +static XMLCharacterEntity XMLEntities[] = {{_CXML("&"), 5, _CXML('&')}, + {_CXML("<"), 4, _CXML('<')}, + {_CXML(">"), 4, _CXML('>')}, + {_CXML("""), 6, _CXML('\"')}, + {_CXML("'"), 6, _CXML('\'')}, + {NULL, 0, '\0'}}; // When rendering the XMLNode to a string (using the "createXMLString" // function), you can ask for a beautiful formatting. This formatting is using @@ -570,8 +572,10 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) { if (f) { char bb[205]; int l = (int)fread(bb, 1, 200, f); - setGlobalOptions(guessCharEncoding(bb, l), guessWideCharChars, - dropWhiteSpace, removeCommentsInMiddleOfText); + setGlobalOptions(guessCharEncoding(bb, l), + guessWideCharChars, + dropWhiteSpace, + removeCommentsInMiddleOfText); fclose(f); } @@ -598,14 +602,19 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) { "%i.\n%s%s%s" #endif , - filename, XMLNode::getError(pResults.error), pResults.nLine, - pResults.nColumn, s1, s2, s3); + filename, + XMLNode::getError(pResults.error), + pResults.nLine, + pResults.nColumn, + s1, + s2, + s3); // display message #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && \ !defined(_XMLPARSER_NO_MESSAGEBOX_) - MessageBoxA(NULL, message, "XML Parsing error", - MB_OK | MB_ICONERROR | MB_TOPMOST); + MessageBoxA( + NULL, message, "XML Parsing error", MB_OK | MB_ICONERROR | MB_TOPMOST); #else printf("%s", message); #endif @@ -780,7 +789,8 @@ typedef enum Attrib { eAttribName = 0, eAttribEquals, eAttribValue } Attrib; // inside a tag typedef enum Status { eInsideTag = 0, eOutsideTag } Status; -XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, +XMLError XMLNode::writeToFile(XMLCSTR filename, + const char *encoding, char nFormat) const { if (!d) return eXMLErrorNone; @@ -794,7 +804,9 @@ XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, if ((!isDeclaration()) && ((d->lpszName) || (!getChildNode().isDeclaration()))) { if (!fwrite(L"\n", - sizeof(wchar_t) * 40, 1, f)) + sizeof(wchar_t) * 40, + 1, + f)) return eXMLErrorCannotWriteFile; } #else @@ -1104,8 +1116,8 @@ static inline XMLCHAR getNextChar(XML *pXML) { // Find the next token in a string. // pcbToken contains the number of characters that have been read. -static NextToken GetNextToken(XML *pXML, int *pcbToken, - enum XMLTokenTypeTag *pType) { +static NextToken +GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) { NextToken result; XMLCHAR ch; XMLCHAR chTemp; @@ -1317,8 +1329,8 @@ static inline void myFree(void *p) { if (p) free(p); } -static inline void *myRealloc(void *p, int newsize, int memInc, - int sizeofElem) { +static inline void * +myRealloc(void *p, int newsize, int memInc, int sizeofElem) { if (p == NULL) { if (memInc) return malloc(memInc * sizeofElem); @@ -1335,8 +1347,8 @@ static inline void *myRealloc(void *p, int newsize, int memInc, } // private: -XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, - XMLElementType xxtype) { +XMLElementPosition +XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) { if (index < 0) return -1; int i = 0, j = (int)((index << 2) + xxtype), *o = d->pOrder; @@ -1361,8 +1373,12 @@ int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) { return i; } -void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p, - int size, XMLElementType xtype) { +void *XMLNode::addToOrder(int memoryIncrease, + int *_pos, + int nc, + void *p, + int size, + XMLElementType xtype) { // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") // out: *_pos is the index inside p p = myRealloc(p, (nc + 1), memoryIncrease, size); @@ -1394,19 +1410,22 @@ void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p, o[i] += 4; *_pos = pos = o[pos] >> 2; - memmove(((char *)p) + (pos + 1) * size, ((char *)p) + pos * size, + memmove(((char *)p) + (pos + 1) * size, + ((char *)p) + pos * size, (nc - pos) * size); return p; } // Add a child node to the given element. -XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, - char isDeclaration, int pos) { +XMLNode XMLNode::addChild_priv(int memoryIncrease, + XMLSTR lpszName, + char isDeclaration, + int pos) { if (!lpszName) return emptyXMLNode; - d->pChild = (XMLNode *)addToOrder(memoryIncrease, &pos, d->nChild, d->pChild, - sizeof(XMLNode), eNodeChild); + d->pChild = (XMLNode *)addToOrder( + memoryIncrease, &pos, d->nChild, d->pChild, sizeof(XMLNode), eNodeChild); d->pChild[pos].d = NULL; d->pChild[pos] = XMLNode(d, lpszName, isDeclaration); d->nChild++; @@ -1414,7 +1433,8 @@ XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, } // Add an attribute to an element. -XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, XMLSTR lpszName, +XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, + XMLSTR lpszName, XMLSTR lpszValuev) { if (!lpszName) return &emptyXMLAttribute; @@ -1441,24 +1461,27 @@ XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) { myFree(lpszValue); return NULL; } - d->pText = (XMLCSTR *)addToOrder(memoryIncrease, &pos, d->nText, d->pText, - sizeof(XMLSTR), eNodeText); + d->pText = (XMLCSTR *)addToOrder( + memoryIncrease, &pos, d->nText, d->pText, sizeof(XMLSTR), eNodeText); d->pText[pos] = lpszValue; d->nText++; return lpszValue; } // Add clear (unformatted) text to the element. -XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, - XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) { +XMLClear *XMLNode::addClear_priv(int memoryIncrease, + XMLSTR lpszValue, + XMLCSTR lpszOpen, + XMLCSTR lpszClose, + int pos) { if (!lpszValue) return &emptyXMLClear; if (!d) { myFree(lpszValue); return &emptyXMLClear; } - d->pClear = (XMLClear *)addToOrder(memoryIncrease, &pos, d->nClear, d->pClear, - sizeof(XMLClear), eNodeClear); + d->pClear = (XMLClear *)addToOrder( + memoryIncrease, &pos, d->nClear, d->pClear, sizeof(XMLClear), eNodeClear); XMLClear *pNewClear = d->pClear + pos; pNewClear->lpszValue = lpszValue; if (!lpszOpen) @@ -1510,8 +1533,11 @@ char XMLNode::parseClearTag(void *px, void *_pClear) { pXML->nIndex += cbTemp + (int)xstrlen(pClear.lpszClose); // Add the clear node to the current element - addClear_priv(MEMORYINCREASE, stringDup(lpXML, cbTemp), pClear.lpszOpen, - pClear.lpszClose, -1); + addClear_priv(MEMORYINCREASE, + stringDup(lpXML, cbTemp), + pClear.lpszOpen, + pClear.lpszClose, + -1); return 0; } @@ -1522,8 +1548,8 @@ char XMLNode::parseClearTag(void *px, void *_pClear) { void XMLNode::exactMemory(XMLNodeData *d) { if (d->pOrder) - d->pOrder = (int *)realloc(d->pOrder, (d->nChild + d->nText + d->nClear) * - sizeof(int)); + d->pOrder = (int *)realloc( + d->pOrder, (d->nChild + d->nText + d->nClear) * sizeof(int)); if (d->pChild) d->pChild = (XMLNode *)realloc(d->pChild, d->nChild * sizeof(XMLNode)); if (d->pAttribute) @@ -1675,7 +1701,8 @@ int XMLNode::ParseXMLElement(void *pa) { // the current one and recurse pNew = addChild_priv(MEMORYINCREASE, stringDup(token.pStr, cbToken), - nDeclaration, -1); + nDeclaration, + -1); while (!pNew.isEmpty()) { // Callself to process the new node. If we return @@ -1724,7 +1751,8 @@ int XMLNode::ParseXMLElement(void *pa) { // Add the new element and recurse pNew = addChild_priv( MEMORYINCREASE, - stringDup(pXML->lpNewElement, pXML->cbNewElement), 0, + stringDup(pXML->lpNewElement, pXML->cbNewElement), + 0, -1); pXML->cbNewElement = 0; } else { @@ -1857,8 +1885,8 @@ int XMLNode::ParseXMLElement(void *pa) { // Eg. 'Attribute AnotherAttribute' case eTokenText: // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE, stringDup(lpszTemp, cbTemp), - NULL); + addAttribute_priv( + MEMORYINCREASE, stringDup(lpszTemp, cbTemp), NULL); // Cache the token then indicate. We are next to // look for the equals attribute lpszTemp = token.pStr; @@ -1882,8 +1910,8 @@ int XMLNode::ParseXMLElement(void *pa) { if (cbTemp) { // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE, - stringDup(lpszTemp, cbTemp), NULL); + addAttribute_priv( + MEMORYINCREASE, stringDup(lpszTemp, cbTemp), NULL); } // If this is the end of the tag then return to the caller @@ -1945,8 +1973,8 @@ int XMLNode::ParseXMLElement(void *pa) { if (!attrVal) return FALSE; } - addAttribute_priv(MEMORYINCREASE, - stringDup(lpszTemp, cbTemp), attrVal); + addAttribute_priv( + MEMORYINCREASE, stringDup(lpszTemp, cbTemp), attrVal); } // Indicate we are searching for a new attribute @@ -1987,8 +2015,8 @@ int XMLNode::ParseXMLElement(void *pa) { } // Count the number of lines and columns in an XML string. -static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, - XMLResults *pResults) { +static void +CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) { XMLCHAR ch; assert(lpXML); assert(pResults); @@ -2009,8 +2037,8 @@ static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, } // Parse XML and return the root element. -XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, - XMLResults *pResults) { +XMLNode +XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) { if (!lpszXML) { if (pResults) { pResults->error = eXMLErrorNoElements; @@ -2021,8 +2049,8 @@ XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, } XMLNode xnode(NULL, NULL, FALSE); - struct XML xml = {lpszXML, lpszXML, 0, 0, eXMLErrorNone, - NULL, 0, NULL, 0, TRUE}; + struct XML xml = { + lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE}; // Create header element xnode.ParseXMLElement(&xml); @@ -2067,8 +2095,8 @@ XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, return xnode; } -XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, - XMLResults *pResults) { +XMLNode +XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) { if (pResults) { pResults->nLine = 0; pResults->nColumn = 0; @@ -2152,7 +2180,8 @@ static inline void charmemset(XMLSTR dest, XMLCHAR c, int l) { // // This recurses through all subnodes then adds contents of the nodes to the // string. -int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, +int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, + XMLSTR lpszMarker, int nFormat) { int nResult = 0; int cb = nFormat < 0 ? 0 : nFormat; @@ -2590,8 +2619,8 @@ XMLNode XMLNode::addChild(XMLNode childNode, int pos) { dc->pParent = d; // int nc=d->nChild; // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); - d->pChild = (XMLNode *)addToOrder(0, &pos, d->nChild, d->pChild, - sizeof(XMLNode), eNodeChild); + d->pChild = (XMLNode *)addToOrder( + 0, &pos, d->nChild, d->pChild, sizeof(XMLNode), eNodeChild); d->pChild[pos].d = dc; d->nChild++; return childNode; @@ -2624,8 +2653,8 @@ void XMLNode::deleteAttribute(XMLCSTR lpszName) { deleteAttribute(j - 1); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, - XMLSTR lpszNewName, int i) { +XMLAttribute * +XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, int i) { if (!d) { if (lpszNewValue) free(lpszNewValue); @@ -2860,8 +2889,8 @@ XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const { return getChildNode(i); } -XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, - XMLCHAR sep) { +XMLNode +XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) { XMLSTR path = stringDup(_path); XMLNode x = getChildNodeByPathNonConst(path, createMissing, sep); if (path) @@ -2869,7 +2898,8 @@ XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, return x; } -XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, +XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, + char createIfMissing, XMLCHAR sep) { if ((!path) || (!(*path))) return *this; @@ -2949,7 +2979,8 @@ XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const { return findPosition(d, j - 1, eNodeChild); } -XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, XMLCSTR attributeName, +XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, + XMLCSTR attributeName, XMLCSTR attributeValue, int *k) const { int i = 0, j; @@ -3127,11 +3158,13 @@ char XMLNode::isDeclaration() const { char XMLNode::isEmpty() const { return (d == NULL); } XMLNode XMLNode::emptyNode() { return XMLNode::emptyXMLNode; } -XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, +XMLNode XMLNode::addChild(XMLCSTR lpszName, + char isDeclaration, XMLElementPosition pos) { return addChild_priv(0, stringDup(lpszName), isDeclaration, pos); } -XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, +XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, + char isDeclaration, XMLElementPosition pos) { return addChild_priv(0, lpszName, isDeclaration, pos); } @@ -3147,12 +3180,16 @@ XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) { XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) { return addText_priv(0, lpszValue, pos); } -XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, - XMLCSTR lpszClose, XMLElementPosition pos) { +XMLClear *XMLNode::addClear(XMLCSTR lpszValue, + XMLCSTR lpszOpen, + XMLCSTR lpszClose, + XMLElementPosition pos) { return addClear_priv(0, stringDup(lpszValue), lpszOpen, lpszClose, pos); } -XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, - XMLCSTR lpszClose, XMLElementPosition pos) { +XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, + XMLCSTR lpszOpen, + XMLCSTR lpszClose, + XMLElementPosition pos) { return addClear_priv(0, lpszValue, lpszOpen, lpszClose, pos); } XMLCSTR XMLNode::updateName(XMLCSTR lpszName) { @@ -3164,16 +3201,16 @@ XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, stringDup(newAttribute->lpszName), oldAttribute->lpszName); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, - XMLCSTR lpszNewName, int i) { - return updateAttribute_WOSD(stringDup(lpszNewValue), stringDup(lpszNewName), - i); +XMLAttribute * +XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, int i) { + return updateAttribute_WOSD( + stringDup(lpszNewValue), stringDup(lpszNewName), i); } XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName) { - return updateAttribute_WOSD(stringDup(lpszNewValue), stringDup(lpszNewName), - lpszOldName); + return updateAttribute_WOSD( + stringDup(lpszNewValue), stringDup(lpszNewName), lpszOldName); } XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) { return updateText_WOSD(stringDup(lpszNewValue), i); @@ -3192,7 +3229,8 @@ XMLClear *XMLNode::updateClear(XMLClear *newP, XMLClear *oldP) { } char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, - char _guessWideCharChars, char _dropWhiteSpace, + char _guessWideCharChars, + char _dropWhiteSpace, char _removeCommentsInMiddleOfText) { guessWideCharChars = _guessWideCharChars; dropWhiteSpace = _dropWhiteSpace; @@ -3371,7 +3409,8 @@ int XMLParserBase64Tool::encodeLength(int inlen, char formatted) { return i; } -XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, +XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, + unsigned int inlen, char formatted) { int i = encodeLength(inlen, formatted), k = 17, eLen = inlen / 3, j; alloc(i * sizeof(XMLCHAR)); @@ -3447,8 +3486,10 @@ unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data, XMLError *xe) { return (unsigned int)((size * 3) / 4); } -unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, - int len, XMLError *xe) { +unsigned char XMLParserBase64Tool::decode(XMLCSTR data, + unsigned char *buf, + int len, + XMLError *xe) { if (xe) *xe = eXMLErrorNone; int i = 0, p = 0; @@ -3561,8 +3602,8 @@ void XMLParserBase64Tool::alloc(int newsize) { } } -unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, - XMLError *xe) { +unsigned char * +XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) { if (xe) *xe = eXMLErrorNone; unsigned int len = decodeSize(data, xe); diff --git a/src/xmlParser.h b/src/xmlParser.h index fe875fa..40a9daa 100644 --- a/src/xmlParser.h +++ b/src/xmlParser.h @@ -291,7 +291,8 @@ typedef struct XMLDLLENTRY XMLNode { /// Parse an XML string and return the root of a XMLNode tree representing the /// string. - static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL, + static XMLNode parseString(XMLCSTR lpXMLString, + XMLCSTR tag = NULL, XMLResults *pResults = NULL); /**< The "parseString" function parse an XML string and return the root of a * XMLNode tree. The "opposite" of this function is the function @@ -314,8 +315,8 @@ typedef struct XMLDLLENTRY XMLNode { /// Parse an XML file and return the root of a XMLNode tree representing the /// file. - static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL, - XMLResults *pResults = NULL); + static XMLNode + parseFile(XMLCSTR filename, XMLCSTR tag = NULL, XMLResults *pResults = NULL); /**< The "parseFile" function parse an XML file and return the root of a * XMLNode tree. The "opposite" of this function is the function "writeToFile" * that re-creates an XML file from an XMLNode tree. If the XML document is @@ -376,7 +377,8 @@ typedef struct XMLDLLENTRY XMLNode { * returns the size in character of the string. */ /// Save the content of an xmlNode inside a file - XMLError writeToFile(XMLCSTR filename, const char *encoding = NULL, + XMLError writeToFile(XMLCSTR filename, + const char *encoding = NULL, char nFormat = 1) const; /**< If nFormat==0, no formatting is required otherwise this returns an user * friendly XML string from a given element with appropriate white spaces and @@ -406,13 +408,17 @@ typedef struct XMLDLLENTRY XMLNode { int *i = NULL) const; ///< return next child node with specific ///< name (return an empty node if failing) XMLNode getChildNodeWithAttribute( - XMLCSTR tagName, XMLCSTR attributeName, XMLCSTR attributeValue = NULL, + XMLCSTR tagName, + XMLCSTR attributeName, + XMLCSTR attributeValue = NULL, int *i = NULL) const; ///< return child node with specific name/attribute ///< (return an empty node if failing) - XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing = 0, + XMLNode getChildNodeByPath(XMLCSTR path, + char createNodeIfMissing = 0, XMLCHAR sep = '/'); ///< return the first child node with specific path - XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing = 0, + XMLNode getChildNodeByPathNonConst(XMLSTR path, + char createNodeIfMissing = 0, XMLCHAR sep = '/'); ///< return the first child node with specific path. @@ -474,7 +480,8 @@ typedef struct XMLDLLENTRY XMLNode { createXMLTopNode(XMLCSTR lpszName, char isDeclaration = FALSE); ///< Create the top node of an XMLNode structure - XMLNode addChild(XMLCSTR lpszName, char isDeclaration = FALSE, + XMLNode addChild(XMLCSTR lpszName, + char isDeclaration = FALSE, XMLElementPosition pos = -1); ///< Add a new child node XMLNode addChild(XMLNode nodeToAdd, XMLElementPosition pos = @@ -485,8 +492,10 @@ typedef struct XMLDLLENTRY XMLNode { XMLCSTR lpszValuev); ///< Add a new attribute XMLCSTR addText(XMLCSTR lpszValue, XMLElementPosition pos = -1); ///< Add a new text content - XMLClear *addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen = NULL, - XMLCSTR lpszClose = NULL, XMLElementPosition pos = -1); + XMLClear *addClear(XMLCSTR lpszValue, + XMLCSTR lpszOpen = NULL, + XMLCSTR lpszClose = NULL, + XMLElementPosition pos = -1); /**< Add a new clear tag * @param lpszOpen default value "" @@ -504,11 +513,13 @@ typedef struct XMLDLLENTRY XMLNode { XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a ///< new one will be added XMLAttribute * - updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName = NULL, + updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName = NULL, int i = 0); ///< if the attribute to update is missing, a new ///< one will be added XMLAttribute *updateAttribute( - XMLCSTR lpszNewValue, XMLCSTR lpszNewName, + XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change ///< the name of the attribute if the attribute to @@ -605,13 +616,15 @@ typedef struct XMLDLLENTRY XMLNode { XMLSTR lpszName, char isDeclaration = FALSE); ///< Create the top node of an XMLNode structure - XMLNode addChild_WOSD(XMLSTR lpszName, char isDeclaration = FALSE, + XMLNode addChild_WOSD(XMLSTR lpszName, + char isDeclaration = FALSE, XMLElementPosition pos = -1); ///< Add a new child node XMLAttribute *addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValue); ///< Add a new attribute XMLCSTR addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos = -1); ///< Add a new text content - XMLClear *addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen = NULL, + XMLClear *addClear_WOSD(XMLSTR lpszValue, + XMLCSTR lpszOpen = NULL, XMLCSTR lpszClose = NULL, XMLElementPosition pos = -1); ///< Add a new clear Tag @@ -621,11 +634,13 @@ typedef struct XMLDLLENTRY XMLNode { XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a ///< new one will be added XMLAttribute * - updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName = NULL, + updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName = NULL, int i = 0); ///< if the attribute to update is missing, a ///< new one will be added XMLAttribute *updateAttribute_WOSD( - XMLSTR lpszNewValue, XMLSTR lpszNewName, + XMLSTR lpszNewValue, + XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change ///< the name of the attribute if the attribute to @@ -687,7 +702,8 @@ typedef struct XMLDLLENTRY XMLNode { /// Sets the global options for the conversions static char setGlobalOptions( XMLCharEncoding characterEncoding = XMLNode::char_encoding_UTF8, - char guessWideCharChars = 1, char dropWhiteSpace = 1, + char guessWideCharChars = 1, + char dropWhiteSpace = 1, char removeCommentsInMiddleOfText = 1); /**< The "setGlobalOptions" function allows you to change four global * parameters that affect string & file parsing. First of all, you @@ -740,8 +756,8 @@ typedef struct XMLDLLENTRY XMLNode { * WideChar) may fail (rarely). */ /// Guess the character encoding of the string (ascii, utf8 or shift-JIS) - static XMLCharEncoding guessCharEncoding(void *buffer, int bufLen, - char useXMLEncodingAttribute = 1); + static XMLCharEncoding + guessCharEncoding(void *buffer, int bufLen, char useXMLEncodingAttribute = 1); /**< The "guessCharEncoding" function try to guess the character encoding. You * most-probably will never have to use this function. It then returns the * appropriate value of the global parameter "characterEncoding" described in @@ -785,8 +801,8 @@ typedef struct XMLDLLENTRY XMLNode { char parseClearTag(void *px, void *pa); char maybeAddTxT(void *pa, XMLCSTR tokenPStr); int ParseXMLElement(void *pXML); - void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, - XMLElementType xtype); + void *addToOrder( + int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); int indexText(XMLCSTR lpszValue) const; int indexClear(XMLCSTR lpszValue) const; XMLNode addChild_priv(int, XMLSTR, char, int); @@ -794,10 +810,10 @@ typedef struct XMLDLLENTRY XMLNode { XMLCSTR addText_priv(int, XMLSTR, int); XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int); void emptyTheNode(char force); - static inline XMLElementPosition findPosition(XMLNodeData *d, int index, - XMLElementType xtype); - static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, - int nFormat); + static inline XMLElementPosition + findPosition(XMLNodeData *d, int index, XMLElementType xtype); + static int + CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); static void exactMemory(XMLNodeData *d); static int detachFromParent(XMLNodeData *d); @@ -930,7 +946,8 @@ typedef struct XMLDLLENTRY XMLParserBase64Tool { * free'd when the XMLParserBase64Tool object is deleted. * All returned strings are sharing the same memory space. */ XMLSTR encode( - unsigned char *inByteBuf, unsigned int inByteLen, + unsigned char *inByteBuf, + unsigned int inByteLen, char formatted = 0); ///< returns a pointer to an internal buffer containing the base64 ///< string containing the binary data encoded from "inByteBuf" @@ -945,7 +962,8 @@ typedef struct XMLDLLENTRY XMLParserBase64Tool { * same memory space. * @param inString If "instring" is malformed, NULL will be returned */ unsigned char * - decode(XMLCSTR inString, int *outByteLen = NULL, + decode(XMLCSTR inString, + int *outByteLen = NULL, XMLError *xe = NULL); ///< returns a pointer to an internal buffer containing the ///< binary data decoded from "inString" @@ -955,7 +973,8 @@ typedef struct XMLDLLENTRY XMLParserBase64Tool { * (in byte) of "outByteBuf" in "inMaxByteOutBuflen". If "outByteBuf" is not * large enough or if data is malformed, then "FALSE" will be returned; * otherwise "TRUE". */ - static unsigned char decode(XMLCSTR inString, unsigned char *outByteBuf, + static unsigned char decode(XMLCSTR inString, + unsigned char *outByteBuf, int inMaxByteOutBuflen, XMLError *xe = NULL); ///< deprecated. diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index 2fe2780..1a4b5a2 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -107,8 +107,11 @@ def run_test(vector): stde = os.path.join(output_path, vector + ".err") with open(stdo, "w") as so, open(stde, "w") as se: p = subprocess.Popen([ - "../build/mcpat", "-infile", infile, "-print_level", "5", - "-opt_for_clk", "1" + "../build/mcpat", + "-i", + infile, + "-p", + "5", ], stdout=so, stderr=se) diff --git a/util/format.sh b/util/format.sh index 4fffc52..405f145 100755 --- a/util/format.sh +++ b/util/format.sh @@ -2,7 +2,7 @@ SCRIPT="$(readlink -f $0)" SCRIPT_PATH="$(dirname $SCRIPT)" -SRC_PATH="$SCRIPT_PATH/.." +SRC_PATH="$SCRIPT_PATH/../src" # Format C Code: find $SRC_PATH -name '*.cpp' \ From a2606a7662489e30e524b732520a682c43996b56 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 21:30:16 -0500 Subject: [PATCH 09/59] refactor-serialization: Refactored FlashController Refactored the Flash Controller Module to separate the Area calculations from the Power Calculations --- src/CMakeLists.txt | 16 +- src/iocontrollers.cc | 2 + src/iocontrollers.h | 2 + src/iocontrollers/CMakeLists.txt | 6 + src/iocontrollers/flashcontroller.cc | 301 +++++++++++++++++++++++++++ src/iocontrollers/flashcontroller.h | 71 +++++++ src/processor.cc | 26 ++- src/processor.h | 3 +- 8 files changed, 409 insertions(+), 18 deletions(-) create mode 100644 src/iocontrollers/CMakeLists.txt create mode 100644 src/iocontrollers/flashcontroller.cc create mode 100644 src/iocontrollers/flashcontroller.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 69474b4..4ca25c1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(cacti) +add_subdirectory(iocontrollers) -add_executable(mcpat +add_library(top arch_const.h array.h array.cc @@ -17,7 +18,6 @@ add_executable(mcpat logic.cc noc.h noc.cc - main.cc memoryctrl.h memoryctrl.cc options.h @@ -32,8 +32,18 @@ add_executable(mcpat XML_Parse.h XML_Parse.cc ) +target_include_directories(top PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(top LINK_PUBLIC cacti iocontrollers) -target_link_libraries(mcpat LINK_PUBLIC cacti Threads::Threads Boost::program_options) +add_executable(mcpat main.cc) + +target_link_libraries(mcpat + LINK_PUBLIC + cacti + top + iocontrollers + Threads::Threads + Boost::program_options) add_custom_command(TARGET mcpat POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PROJECT_BINARY_DIR}/mcpat diff --git a/src/iocontrollers.cc b/src/iocontrollers.cc index 1a5d8b2..2e349b5 100644 --- a/src/iocontrollers.cc +++ b/src/iocontrollers.cc @@ -437,6 +437,7 @@ void PCIeController::set_pcie_param() { // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); } +#if 0 FlashController::FlashController(ParseXML *XML_interface, InputParameter *interface_ip_) : XML(XML_interface), interface_ip(*interface_ip_) { @@ -578,3 +579,4 @@ void FlashController::set_fc_param() { interface_ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; } } +#endif diff --git a/src/iocontrollers.h b/src/iocontrollers.h index 1c99d4f..77cf357 100644 --- a/src/iocontrollers.h +++ b/src/iocontrollers.h @@ -70,6 +70,7 @@ class PCIeController : public Component { ~PCIeController(){}; }; +#if 0 class FlashController : public Component { public: ParseXML *XML; @@ -83,3 +84,4 @@ class FlashController : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~FlashController(){}; }; +#endif diff --git a/src/iocontrollers/CMakeLists.txt b/src/iocontrollers/CMakeLists.txt new file mode 100644 index 0000000..76986a1 --- /dev/null +++ b/src/iocontrollers/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(iocontrollers + flashcontroller.h + flashcontroller.cc +) +target_include_directories(iocontrollers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(iocontrollers LINK_PUBLIC cacti top) diff --git a/src/iocontrollers/flashcontroller.cc b/src/iocontrollers/flashcontroller.cc new file mode 100644 index 0000000..aa64420 --- /dev/null +++ b/src/iocontrollers/flashcontroller.cc @@ -0,0 +1,301 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * Author: + * Andrew Smith + ***************************************************************************/ + +#include "flashcontroller.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* + * FlashController() + * Constructor, Initializes the member variables that are shared across + * methods. + */ +FlashController::FlashController() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; + + number_channel = 0.0; + // based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support + // 8x lanes with each lane speed up to 250MB/s (PCIe1.1x) This is already + // saturate the 200MB/s of the flash controller core above. + ctrl_gates = 129267; + SerDer_gates = 200000 / 8; + + NMOS_sizing = 0.0; + PMOS_sizing = 0.0; +} + +/* + * computeArea() + * Computes the component area based off of the input parameters in the XML. + * Side Effects: + * Sets the component area member to the calculated area. + * Input: + * None + * Output: + * None + */ +void FlashController::computeArea() { + double ctrl_area = 0.0; + double SerDer_area = 0.0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + if (!init_params) { + std::cerr << "[ FlashController ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ + local_result = init_interface(&ip); + if (fcp.type == 0) // high performance NIU + { + std::cerr + << "Current McPAT does not support high performance flash contorller " + "since even low power designs are enough for maintain throughput" + << endl; + exit(1); + } + ctrl_area = 0.243 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from + // CAST + SerDer_area = 0.36 / 8 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + number_channel = 1 + (fcp.num_channels - 1) * 0.2; + area.set_area((ctrl_area + (fcp.withPHY ? SerDer_area : 0)) * 1e6 * + number_channel); +} + +/* + * computeStaticPower() + * Computes the static power based off of the input parameters from the xml. + * It calculates leakage power, + * + * TODO: Add Vdd such that the static power & dynamic power can reflect + * changes in the chip power supply. + * + * Side Effects: + * Sets the static power, leakage, and power gated leakage + * Input: + * None + * Output: + * None + */ +void FlashController::computeStaticPower() { + double ctrl_dyn = 0.0; + double SerDer_dyn = 0.0; + + if (!init_params) { + std::cerr << "[ FlashController ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + if (fcp.type == 0) // high performance NIU + { + std::cerr + << "Current McPAT does not support high performance flash contorller " + "since even low power designs are enough for maintain throughput" + << endl; + exit(1); + } + + // Power + // Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s + // This is power not energy! + ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 * + (ip.F_sz_nm / 65.0); + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 1.6 * (ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 * + g_tp.peri_global.Vdd / 1.2; + // max Per controller speed is 1.6Gb/s (200MB/s) + + power_t.readOp.dynamic = + (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) * number_channel; + power_t.readOp.leakage = + ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + double pg_reduction = power_gating_leakage_reduction( + false); // array structure all retain state; + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; + power_t.readOp.gate_leakage = + ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W +} + +/* + * computeDynamicPower() + * Compute both Peak Power and Runtime Power based on the stats of the input + * xml. + * Side Effects: + * Sets the runtime power, and the peak dynamic power in the component + * class + * Input: + * None + * Output: + * None + */ +void FlashController::computeDynamicPower() { + if (!init_stats) { + std::cerr << "[ FlashController ] Error: must set stats before calling " + "computeDynamicPower()\n"; + exit(1); + } + // Peak Dynamic Power based on Duty Cycle + power = power_t; + power.readOp.dynamic *= fcp.duty_cycle; + // Runtime Dynamic Power based on % Load + rt_power = power_t; + rt_power.readOp.dynamic *= fcp.perc_load; +} + +/* + * display(uint32_t, bool) + * Display the Power, Area, and Timing results to the standard output + * Side Effects: + * None + * Input: + * indent - How far in to indent + * enable - toggle printing + * Output: + * None + */ +void FlashController::display(uint32_t indent, bool enable) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + if (enable) { + std::cout << "Flash Controller:" << std::endl; + std::cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" + << std::endl; // no multiply of clock since this is power already + std::cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; + if (power_gating) + std::cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic + << " W" << std::endl; + std::cout << std::endl; + } else { + } +} + +/* + * set_params(const ParseXML, InputParameter) + * Sets the parts of the flash controller params that contribute to area and + * static power. Must be called before computing area or static power. + * Side Effects: + * sets the interface_ip struct, and sets the params struct to the + * "params" from the xml file. Also sets init_params to true. + * Input: + * *XML - Parsed XML + * *interface_ip - Interface from McPAT used in Cacti Library + * Output: + * None + */ +void FlashController::set_params(const ParseXML *XML, + InputParameter *interface_ip) { + ip = *interface_ip; + // fcp.clockRate = XML->sys.flashc.mc_clock; + fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; + fcp.num_channels = ceil(fcp.peakDataTransferRate / 200); + fcp.num_mcs = XML->sys.flashc.number_mcs; + fcp.type = XML->sys.flashc.type; + fcp.withPHY = XML->sys.flashc.withPHY; + + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + + if (XML->sys.flashc.vdd > 0) { + ip.specific_hp_vdd = true; + ip.specific_lop_vdd = true; + ip.specific_lstp_vdd = true; + ip.hp_Vdd = XML->sys.flashc.vdd; + ip.lop_Vdd = XML->sys.flashc.vdd; + ip.lstp_Vdd = XML->sys.flashc.vdd; + } + if (XML->sys.flashc.power_gating_vcc > -1) { + ip.specific_vcc_min = true; + ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; + } + init_params = true; +} + +/* + * set_stats(const ParseXML) + * Sets the parts of the flash controller params that contribute to dynamic + * power. + * Side Effects: + * Store duty cycle and and percentage load into fc params, sets + * init_stats to true + * Input: + * *XML - Parsed XML + * Output: + * None + */ +void FlashController::set_stats(const ParseXML *XML) { + fcp.duty_cycle = XML->sys.flashc.duty_cycle; + fcp.perc_load = XML->sys.flashc.total_load_perc; + init_stats = true; +} diff --git a/src/iocontrollers/flashcontroller.h b/src/iocontrollers/flashcontroller.h new file mode 100644 index 0000000..35af1fc --- /dev/null +++ b/src/iocontrollers/flashcontroller.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * Author: + * Andrew Smith + ***************************************************************************/ +#ifndef __FLASHCONTROLLER_H__ +#define __FLASHCONTROLLER_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "parameter.h" + +#include + +class FlashController : public Component { +public: + InputParameter ip; + MCParam fcp; + powerDef power_t; + uca_org_t local_result; + + FlashController(); + void set_params(const ParseXML *XML, InputParameter *ip); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; + + double number_channel; + double ctrl_gates; + double SerDer_gates; + double NMOS_sizing; + double PMOS_sizing; +}; + +#endif // __FLASHCONTROLLER_H__ diff --git a/src/processor.cc b/src/processor.cc index d72221b..f55fedc 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -47,7 +47,7 @@ Processor::Processor(ParseXML *XML_interface) : XML(XML_interface), // TODO: using one global copy may have problems. - mc(nullptr), niu(nullptr), pcie(nullptr), flashcontroller(nullptr) { + mc(nullptr), niu(nullptr), pcie(nullptr) { /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory @@ -340,19 +340,21 @@ Processor::Processor(ParseXML *XML_interface) if (XML->sys.flashc.number_mcs > 0) // flash controller { - flashcontroller = new FlashController(XML, &interface_ip); - flashcontroller->computeEnergy(); - flashcontroller->computeEnergy(false); - double number_fcs = flashcontroller->fcp.num_mcs; + flashcontroller.set_params(XML, &interface_ip); + flashcontroller.set_stats(XML); + flashcontroller.computeArea(); + flashcontroller.computeStaticPower(); + flashcontroller.computeDynamicPower(); + double number_fcs = flashcontroller.fcp.num_mcs; flashcontrollers.area.set_area(flashcontrollers.area.get_area() + - flashcontroller->area.get_area() * + flashcontroller.area.get_area() * number_fcs); area.set_area(area.get_area() + flashcontrollers.area.get_area()); set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); - flashcontrollers.power = flashcontroller->power * pppm_t; + flashcontrollers.power = flashcontroller.power * pppm_t; power = power + flashcontrollers.power; set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); - flashcontrollers.rt_power = flashcontroller->rt_power * pppm_t; + flashcontrollers.rt_power = flashcontroller.rt_power * pppm_t; rt_power = rt_power + flashcontrollers.rt_power; } @@ -799,7 +801,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (XML->sys.flashc.number_mcs > 0) { cout << indent_str - << "Total Flash/SSD Controllers: " << flashcontroller->fcp.num_mcs + << "Total Flash/SSD Controllers: " << flashcontroller.fcp.num_mcs << " Flash/SSD Controllers " << endl; displayDeviceType(XML->sys.device_type, indent); cout << indent_str_next @@ -925,7 +927,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (XML->sys.flashc.number_mcs > 0 && XML->sys.flashc.memory_channels_per_mc > 0) { - flashcontroller->displayEnergy(indent + 4, is_tdp); + flashcontroller.display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1116,8 +1118,4 @@ Processor::~Processor() { delete pcie; pcie = nullptr; } - if (flashcontroller) { - delete flashcontroller; - flashcontroller = nullptr; - } }; diff --git a/src/processor.h b/src/processor.h index d0d5a6c..77b23d1 100644 --- a/src/processor.h +++ b/src/processor.h @@ -38,6 +38,7 @@ #include "basic_components.h" #include "core.h" #include "decoder.h" +#include "flashcontroller.h" #include "iocontrollers.h" #include "memoryctrl.h" #include "noc.h" @@ -59,7 +60,7 @@ class Processor : public Component { MemoryController *mc; NIUController *niu; PCIeController *pcie; - FlashController *flashcontroller; + FlashController flashcontroller; InputParameter interface_ip; ProcParam procdynp; // wire globalInterconnect; From c5ee7ff8f8858912e4a2825f73f057f4e032de6e Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 22:38:26 -0500 Subject: [PATCH 10/59] refactor-serialization: Refactored PCIeController Refactored the PCIe Controller Module to separate the Area calculations from the Power Calculations --- src/iocontrollers.cc | 2 + src/iocontrollers.h | 2 + src/iocontrollers/CMakeLists.txt | 6 +- ...flashcontroller.cc => flash_controller.cc} | 2 +- .../{flashcontroller.h => flash_controller.h} | 6 +- src/iocontrollers/pcie_controller.cc | 326 ++++++++++++++++++ src/iocontrollers/pcie_controller.h | 65 ++++ src/processor.cc | 31 +- src/processor.h | 5 +- 9 files changed, 421 insertions(+), 24 deletions(-) rename src/iocontrollers/{flashcontroller.cc => flash_controller.cc} (99%) rename src/iocontrollers/{flashcontroller.h => flash_controller.h} (95%) create mode 100644 src/iocontrollers/pcie_controller.cc create mode 100644 src/iocontrollers/pcie_controller.h diff --git a/src/iocontrollers.cc b/src/iocontrollers.cc index 2e349b5..a6a86c5 100644 --- a/src/iocontrollers.cc +++ b/src/iocontrollers.cc @@ -260,6 +260,7 @@ void NIUController::set_niu_param() { // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); } +#if 0 PCIeController::PCIeController(ParseXML *XML_interface, InputParameter *interface_ip_) : XML(XML_interface), interface_ip(*interface_ip_) { @@ -436,6 +437,7 @@ void PCIeController::set_pcie_param() { // pciep.executionTime = // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); } +#endif #if 0 FlashController::FlashController(ParseXML *XML_interface, diff --git a/src/iocontrollers.h b/src/iocontrollers.h index 77cf357..3f7d072 100644 --- a/src/iocontrollers.h +++ b/src/iocontrollers.h @@ -56,6 +56,7 @@ class NIUController : public Component { ~NIUController(){}; }; +#if 0 class PCIeController : public Component { public: ParseXML *XML; @@ -69,6 +70,7 @@ class PCIeController : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~PCIeController(){}; }; +#endif #if 0 class FlashController : public Component { diff --git a/src/iocontrollers/CMakeLists.txt b/src/iocontrollers/CMakeLists.txt index 76986a1..2ced58e 100644 --- a/src/iocontrollers/CMakeLists.txt +++ b/src/iocontrollers/CMakeLists.txt @@ -1,6 +1,8 @@ add_library(iocontrollers - flashcontroller.h - flashcontroller.cc + flash_controller.h + flash_controller.cc + pcie_controller.h + pcie_controller.cc ) target_include_directories(iocontrollers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(iocontrollers LINK_PUBLIC cacti top) diff --git a/src/iocontrollers/flashcontroller.cc b/src/iocontrollers/flash_controller.cc similarity index 99% rename from src/iocontrollers/flashcontroller.cc rename to src/iocontrollers/flash_controller.cc index aa64420..1d0cfc8 100644 --- a/src/iocontrollers/flashcontroller.cc +++ b/src/iocontrollers/flash_controller.cc @@ -31,7 +31,7 @@ * Andrew Smith ***************************************************************************/ -#include "flashcontroller.h" +#include "flash_controller.h" #include "XML_Parse.h" #include "basic_circuit.h" diff --git a/src/iocontrollers/flashcontroller.h b/src/iocontrollers/flash_controller.h similarity index 95% rename from src/iocontrollers/flashcontroller.h rename to src/iocontrollers/flash_controller.h index 35af1fc..cfb4138 100644 --- a/src/iocontrollers/flashcontroller.h +++ b/src/iocontrollers/flash_controller.h @@ -30,8 +30,8 @@ * Author: * Andrew Smith ***************************************************************************/ -#ifndef __FLASHCONTROLLER_H__ -#define __FLASHCONTROLLER_H__ +#ifndef __FLASH_CONTROLLER_H__ +#define __FLASH_CONTROLLER_H__ #include "XML_Parse.h" #include "array.h" @@ -48,7 +48,7 @@ class FlashController : public Component { uca_org_t local_result; FlashController(); - void set_params(const ParseXML *XML, InputParameter *ip); + void set_params(const ParseXML *XML, InputParameter *interface_ip); void set_stats(const ParseXML *XML); void computeArea(); void computeStaticPower(); diff --git a/src/iocontrollers/pcie_controller.cc b/src/iocontrollers/pcie_controller.cc new file mode 100644 index 0000000..5953280 --- /dev/null +++ b/src/iocontrollers/pcie_controller.cc @@ -0,0 +1,326 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * Author: + * Andrew Smith + ***************************************************************************/ +#include "pcie_controller.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* + * PCIeController() + * Constructor, Initializes the member variables that are shared across + * methods. + */ +PCIeController::PCIeController() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; +} + +/* + * computeArea() + * Computes the component area based off of the input parameters in the XML. + * Side Effects: + * Sets the component area member to the calculated area. + * Input: + * None + * Output: + * None + */ +void PCIeController::computeArea() { + double frontend_area = 0.0; + double phy_area = 0.0; + double ctrl_area = 0.0; + double SerDer_area = 0.0; + + if (!init_params) { + std::cerr << "[ PCIeController ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ + local_result = init_interface(&ip); + if (pciep.type == 0) // high performance NIU + { + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate @ 65nm. + ctrl_area = (5.2 + 0.5) / 2 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, and Cadence + // ChipEstimate @ 65nm. + frontend_area = + (5.2 + 0.1) / 2 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate hard IP @65nm. SerDer is very hard to scale + SerDer_area = (3.03 + 0.36) * (ip.F_sz_um / 0.065); //* (ip.F_sz_um/0.065); + phy_area = frontend_area + SerDer_area; + // total area + } else { + ctrl_area = 0.412 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, and Cadence + // ChipEstimate @ 65nm. + SerDer_area = 0.36 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // total area + } + area.set_area(((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 * + pciep.num_channels) * + 1e6); +} + +/* + * computeStaticPower() + * Computes the static power based off of the input parameters from the xml. + * It calculates leakage power, + * + * TODO: Add Vdd such that the static power & dynamic power can reflect + * changes in the chip power supply. + * + * Side Effects: + * Sets the static power, leakage, and power gated leakage + * Input: + * None + * Output: + * None + */ +void PCIeController::computeStaticPower() { + double frontend_dyn = 0.0; + double ctrl_dyn = 0.0; + double SerDer_dyn = 0.0; + double frontend_gates = 0.0; + double ctrl_gates = 0.0; + double SerDer_gates = 0.0; + double NMOS_sizing = 0.0; + double PMOS_sizing = 0.0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + + if (!init_params) { + std::cerr << "[ PCIeController ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + if (pciep.type == 0) // high performance NIU + { + // Power + // Cadence ChipEstimate using 65nm the controller includes everything: the + // PHY, the data link and transaction layer + ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = + // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(ip.F_sz_nm/65.0); + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 4 * (ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 * + g_tp.peri_global.Vdd / + 1.2; // PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle + + // power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; + // Cadence ChipEstimate using 65nm + ctrl_gates = 900000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + // SerDer_gates = 200000/8; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + // Power + // Cadence ChipEstimate using 65nm the controller includes everything: the + // PHY, the data link and transaction layer + ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = + // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(ip.F_sz_nm/65.0); + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 4 * (ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / 1.2 * + g_tp.peri_global.Vdd / + 1.2; // PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle + + // Cadence ChipEstimate using 65nm + ctrl_gates = 200000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + SerDer_gates = 200000 / 8 * pciep.num_channels; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } + power_t.readOp.dynamic = + (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * pciep.num_channels; + power_t.readOp.leakage = + (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; + power_t.readOp.gate_leakage = + (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W +} + +/* + * computeDynamicPower() + * Compute both Peak Power and Runtime Power based on the stats of the input + * xml. + * Side Effects: + * Sets the runtime power, and the peak dynamic power in the component + * class + * Input: + * None + * Output: + * None + */ +void PCIeController::computeDynamicPower() { + power = power_t; + power.readOp.dynamic *= pciep.duty_cycle; + rt_power = power_t; + rt_power.readOp.dynamic *= pciep.perc_load; +} + +/* + * display(uint32_t, bool) + * Display the Power, Area, and Timing results to the standard output + * Side Effects: + * None + * Input: + * indent - How far in to indent + * enable - toggle printing + * Output: + * None + */ +void PCIeController::display(uint32_t indent, bool enable) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + + if (enable) { + cout << "PCIe:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * pciep.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic * pciep.clockRate + << " W" << endl; + cout << endl; + } else { + } +} + +/* + * set_params(const ParseXML, InputParameter) + * Sets the parts of the flash controller params that contribute to area and + * static power. Must be called before computing area or static power. + * Side Effects: + * sets the interface_ip struct, and sets the params struct to the + * "params" from the xml file. Also sets init_params to true. + * Input: + * *XML - Parsed XML + * *interface_ip - Interface from McPAT used in Cacti Library + * Output: + * None + */ +void PCIeController::set_params(const ParseXML *XML, + InputParameter *interface_ip) { + ip = *interface_ip; + pciep.clockRate = XML->sys.pcie.clockrate; + pciep.clockRate *= 1e6; + pciep.num_units = XML->sys.pcie.number_units; + pciep.num_channels = XML->sys.pcie.num_channels; + pciep.type = XML->sys.pcie.type; + pciep.withPHY = XML->sys.pcie.withPHY; + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + + if (XML->sys.pcie.vdd > 0) { + ip.specific_hp_vdd = true; + ip.specific_lop_vdd = true; + ip.specific_lstp_vdd = true; + ip.hp_Vdd = XML->sys.pcie.vdd; + ip.lop_Vdd = XML->sys.pcie.vdd; + ip.lstp_Vdd = XML->sys.pcie.vdd; + } + if (XML->sys.pcie.power_gating_vcc > -1) { + ip.specific_vcc_min = true; + ip.user_defined_vcc_min = XML->sys.pcie.power_gating_vcc; + } + init_params = true; +} + +/* + * set_stats(const ParseXML) + * Sets the parts of the flash controller params that contribute to dynamic + * power. + * Side Effects: + * Store duty cycle and and percentage load into fc params, sets + * init_stats to true + * Input: + * *XML - Parsed XML + * Output: + * None + */ +void PCIeController::set_stats(const ParseXML *XML) { + pciep.duty_cycle = XML->sys.pcie.duty_cycle; + pciep.perc_load = XML->sys.pcie.total_load_perc; + init_stats = true; +} diff --git a/src/iocontrollers/pcie_controller.h b/src/iocontrollers/pcie_controller.h new file mode 100644 index 0000000..eacda70 --- /dev/null +++ b/src/iocontrollers/pcie_controller.h @@ -0,0 +1,65 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * Author: + * Andrew Smith + ***************************************************************************/ +#ifndef __PCIE_CONTROLLER_H__ +#define __PCIE_CONTROLLER_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "parameter.h" + +#include + +class PCIeController : public Component { +public: + InputParameter ip; + PCIeParam pciep; + powerDef power_t; + uca_org_t local_result; + + PCIeController(); + void set_params(const ParseXML *XML, InputParameter *interface_ip); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; +}; + +#endif // __PCIE_CONTROLLER_H__ diff --git a/src/processor.cc b/src/processor.cc index f55fedc..33e6b0c 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -47,7 +47,7 @@ Processor::Processor(ParseXML *XML_interface) : XML(XML_interface), // TODO: using one global copy may have problems. - mc(nullptr), niu(nullptr), pcie(nullptr) { + mc(nullptr), niu(nullptr) { /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory @@ -383,26 +383,29 @@ Processor::Processor(ParseXML *XML_interface) } if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - pcie = new PCIeController(XML, &interface_ip); - pcie->computeEnergy(); - pcie->computeEnergy(false); + pcie.set_params(XML, &interface_ip); + pcie.computeArea(); pcies.area.set_area(pcies.area.get_area() + - pcie->area.get_area() * XML->sys.pcie.number_units); + pcie.area.get_area() * XML->sys.pcie.number_units); area.set_area(area.get_area() + - pcie->area.get_area() * XML->sys.pcie.number_units); + pcie.area.get_area() * XML->sys.pcie.number_units); set_pppm(pppm_t, - XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units * pcie.pciep.clockRate, XML->sys.pcie.number_units, XML->sys.pcie.number_units, XML->sys.pcie.number_units); - pcies.power = pcie->power * pppm_t; + + pcie.set_stats(XML); + pcie.computeStaticPower(); + pcie.computeDynamicPower(); + pcies.power = pcie.power * pppm_t; power = power + pcies.power; set_pppm(pppm_t, - XML->sys.pcie.number_units * pcie->pciep.clockRate, + XML->sys.pcie.number_units * pcie.pciep.clockRate, XML->sys.pcie.number_units, XML->sys.pcie.number_units, XML->sys.pcie.number_units); - pcies.rt_power = pcie->rt_power * pppm_t; + pcies.rt_power = pcie.rt_power * pppm_t; rt_power = rt_power + pcies.rt_power; } @@ -857,7 +860,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; } if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - cout << indent_str << "Total PCIes: " << pcie->pciep.num_units + cout << indent_str << "Total PCIes: " << pcie.pciep.num_units << " PCIe Controllers " << endl; displayDeviceType(XML->sys.device_type, indent); cout << indent_str_next << "Area = " << pcies.area.get_area() * 1e-6 @@ -939,7 +942,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << endl; } if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - pcie->displayEnergy(indent + 4, is_tdp); + pcie.display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1114,8 +1117,4 @@ Processor::~Processor() { delete niu; niu = nullptr; } - if (pcie) { - delete pcie; - pcie = nullptr; - } }; diff --git a/src/processor.h b/src/processor.h index 77b23d1..183dba1 100644 --- a/src/processor.h +++ b/src/processor.h @@ -38,11 +38,12 @@ #include "basic_components.h" #include "core.h" #include "decoder.h" -#include "flashcontroller.h" +#include "flash_controller.h" #include "iocontrollers.h" #include "memoryctrl.h" #include "noc.h" #include "parameter.h" +#include "pcie_controller.h" #include "router.h" #include "sharedcache.h" @@ -59,7 +60,7 @@ class Processor : public Component { vector nocs; MemoryController *mc; NIUController *niu; - PCIeController *pcie; + PCIeController pcie; FlashController flashcontroller; InputParameter interface_ip; ProcParam procdynp; From a95e705481ff4fe219748607d72e7800d37577d6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 9 Jun 2020 23:30:04 -0500 Subject: [PATCH 11/59] refactor-serialization: Refactored NIUController Refactored the NIU Controller Module to separate the Area calculations from the Power Calculations --- src/CMakeLists.txt | 2 - src/iocontrollers.cc | 584 ------------------ src/iocontrollers/CMakeLists.txt | 2 + src/iocontrollers/niu_controller.cc | 333 ++++++++++ .../niu_controller.h} | 64 +- src/iocontrollers/pcie_controller.cc | 1 + src/processor.cc | 30 +- src/processor.h | 4 +- 8 files changed, 372 insertions(+), 648 deletions(-) delete mode 100644 src/iocontrollers.cc create mode 100644 src/iocontrollers/niu_controller.cc rename src/{iocontrollers.h => iocontrollers/niu_controller.h} (62%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4ca25c1..936314e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,8 +12,6 @@ add_library(top globalvar.h interconnect.h interconnect.cc - iocontrollers.h - iocontrollers.cc logic.h logic.cc noc.h diff --git a/src/iocontrollers.cc b/src/iocontrollers.cc deleted file mode 100644 index a6a86c5..0000000 --- a/src/iocontrollers.cc +++ /dev/null @@ -1,584 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ -#include "iocontrollers.h" - -#include "XML_Parse.h" -#include "basic_circuit.h" -#include "basic_components.h" -#include "const.h" -#include "io.h" -#include "logic.h" -#include "parameter.h" - -#include -#include -#include -#include -#include - -/* -SUN Niagara 2 I/O power analysis: -total signal bits: 711 -Total FBDIMM bits: (14+10)*2*8= 384 -PCIe bits: (8 + 8)*2 = 32 -10Gb NIC: (4*2+4*2)*2 = 32 -Debug I/Os: 168 -Other I/Os: 711- 32-32 - 384 - 168 = 95 - -According to "Implementation of an 8-Core, 64-Thread, Power-Efficient SPARC -Server on a Chip" 90% of I/Os are SerDers (the calucaltion is -384+64/(711-168)=83% about the same as the 90% reported in the paper) ---> around 80Pins are common I/Os. -Common I/Os consumes 71mW/Gb/s according to Cadence ChipEstimate @65nm -Niagara 2 I/O clock is 1/4 of core clock. --> 87pin (<--((711-168)*17%)) * -71mW/Gb/s *0.25*1.4Ghz = 2.17W - -Total dynamic power of FBDIMM, NIC, PCIe = 84*0.132 + 84*0.049*0.132 = 11.14 -- 2.17 = 8.98 Further, if assuming I/O logic power is about 50% of I/Os then -Total energy of FBDIMM, NIC, PCIe = 11.14 - 2.17*1.5 = 7.89 - */ - -/* - * A bug in Cadence ChipEstimator: After update the clock rate in the clock tab, - * a user need to re-select the IP clock (the same clk) and then click Estimate. - * if not reselect the new clock rate may not be propogate into the IPs. - * - */ - -NIUController::NIUController(ParseXML *XML_interface, - InputParameter *interface_ip_) - : XML(XML_interface), interface_ip(*interface_ip_) { - - double frontend_area, phy_area, mac_area, SerDer_area; - double frontend_dyn, mac_dyn, SerDer_dyn; - double frontend_gates, mac_gates, SerDer_gates = 0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - set_niu_param(); - local_result = init_interface(&interface_ip); - - if (niup.type == 0) // high performance NIU - { - // Area estimation based on average of die photo from Niagara 2 and Cadence - // ChipEstimate using 65nm. - mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); - // Area estimation based on average of die photo from Niagara 2, ISSCC "An - // 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" and"A 1.2-V-Only 900-mW - // 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning - // Technique" Frontend is PCS - frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * - (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); - // Area estimation based on average of die photo from Niagara 2 and Cadence - // ChipEstimate hard IP @65nm. SerDer is very hard to scale - SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um / - 0.065); //* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - // total area - area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); - // Power - // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = - // P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * - (interface_ip.F_sz_nm / - 65.0); // niup.clockRate; //2.19W@1GHz fully active according to - // Cadence ChipEstimate @65nm - // Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * (interface_ip.F_sz_nm / 65.0); // niup.clockRate; - // according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 - // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; - SerDer_dyn /= - niup.clockRate; // covert to energy per clock cycle of whole NIU - - // Cadence ChipEstimate using 65nm - mac_gates = 111700; - frontend_gates = 320000; - SerDer_gates = 200000; - NMOS_sizing = 5 * g_tp.min_w_nmos_; - PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - - } else { // Low power implementations are mostly from Cadence ChipEstimator; - // Ignore the multiple IP effect - // ---When there are multiple IP (same kind or not) selected, Cadence - // ChipEstimator results are not a simple summation of all IPs. Ignore this - // effect - mac_area = - 0.24 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); - frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); // Frontend is the PCS layer - SerDer_area = - 0.35 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); - // Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet - // Transceiver and XAUI Interface With Robust VCO Tuning Technique" and the - // ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly - // with the technology total area - area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); - // Power - // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = - // P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * - (interface_ip.F_sz_nm / - 65.0); // niup.clockRate; //2.19W@1GHz fully active according to - // Cadence ChipEstimate @65nm - // Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * (interface_ip.F_sz_nm / 65.0); // niup.clockRate; - // SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm - SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) * - g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; - SerDer_dyn /= - niup.clockRate; // covert to energy per clock cycle of whole NIU - - mac_gates = 111700; - frontend_gates = 52000; - SerDer_gates = 199260; - - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - } - - power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; - power_t.readOp.leakage = - (mac_gates + frontend_gates + frontend_gates) * - cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - double long_channel_device_reduction = - longer_channel_device_reduction(Uncore_device); - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.longer_channel_leakage = - power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = - power_t.readOp.power_gated_leakage * long_channel_device_reduction; - - power_t.readOp.gate_leakage = - (mac_gates + frontend_gates + frontend_gates) * - cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W -} - -void NIUController::computeEnergy(bool is_tdp) { - if (is_tdp) { - - power = power_t; - power.readOp.dynamic *= niup.duty_cycle; - - } else { - rt_power = power_t; - rt_power.readOp.dynamic *= niup.perc_load; - } -} - -void NIUController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << "NIU:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str - << "Peak Dynamic = " << power.readOp.dynamic * niup.clockRate << " W" - << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str - << "Runtime Dynamic = " << rt_power.readOp.dynamic * niup.clockRate - << " W" << endl; - cout << endl; - } else { - } -} - -void NIUController::set_niu_param() { - niup.clockRate = XML->sys.niu.clockrate; - niup.clockRate *= 1e6; - niup.num_units = XML->sys.niu.number_units; - niup.duty_cycle = XML->sys.niu.duty_cycle; - niup.perc_load = XML->sys.niu.total_load_perc; - niup.type = XML->sys.niu.type; - if (XML->sys.niu.vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.niu.vdd; - interface_ip.lop_Vdd = XML->sys.niu.vdd; - interface_ip.lstp_Vdd = XML->sys.niu.vdd; - } - - if (XML->sys.niu.power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.niu.power_gating_vcc; - } - // niup.executionTime = - // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -} - -#if 0 -PCIeController::PCIeController(ParseXML *XML_interface, - InputParameter *interface_ip_) - : XML(XML_interface), interface_ip(*interface_ip_) { - - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates, frontend_gates, SerDer_gates = 0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - set_pcie_param(); - local_result = init_interface(&interface_ip); - - if (pciep.type == 0) // high performance NIU - { - // Area estimation based on average of die photo from Niagara 2 and Cadence - // ChipEstimate @ 65nm. - ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); - // Area estimation based on average of die photo from Niagara 2, and Cadence - // ChipEstimate @ 65nm. - frontend_area = (5.2 + 0.1) / 2 * (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); - // Area estimation based on average of die photo from Niagara 2 and Cadence - // ChipEstimate hard IP @65nm. SerDer is very hard to scale - SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um / - 0.065); //* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - // total area - // Power - // Cadence ChipEstimate using 65nm the controller includes everything: the - // PHY, the data link and transaction layer - ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * (interface_ip.F_sz_nm / 65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = - // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / - 1.2; // PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle - - // power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; - // Cadence ChipEstimate using 65nm - ctrl_gates = 900000 / 8 * pciep.num_channels; - // frontend_gates = 120000/8; - // SerDer_gates = 200000/8; - NMOS_sizing = 5 * g_tp.min_w_nmos_; - PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - } else { - ctrl_area = - 0.412 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); - // Area estimation based on average of die photo from Niagara 2, and Cadence - // ChipEstimate @ 65nm. - SerDer_area = - 0.36 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); - // total area - // Power - // Cadence ChipEstimate using 65nm the controller includes everything: the - // PHY, the data link and transaction layer - ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / - 1.1 * (interface_ip.F_sz_nm / 65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = - // 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / - 1.2; // PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate; // covert to energy per clock cycle - - // Cadence ChipEstimate using 65nm - ctrl_gates = 200000 / 8 * pciep.num_channels; - // frontend_gates = 120000/8; - SerDer_gates = 200000 / 8 * pciep.num_channels; - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - } - area.set_area(((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 * - pciep.num_channels) * - 1e6); - power_t.readOp.dynamic = - (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * pciep.num_channels; - power_t.readOp.leakage = - (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * - cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - double long_channel_device_reduction = - longer_channel_device_reduction(Uncore_device); - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.longer_channel_leakage = - power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = - power_t.readOp.power_gated_leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = - (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * - cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W -} - -void PCIeController::computeEnergy(bool is_tdp) { - if (is_tdp) { - - power = power_t; - power.readOp.dynamic *= pciep.duty_cycle; - - } else { - rt_power = power_t; - rt_power.readOp.dynamic *= pciep.perc_load; - } -} - -void PCIeController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << "PCIe:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str - << "Peak Dynamic = " << power.readOp.dynamic * pciep.clockRate << " W" - << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str - << "Runtime Dynamic = " << rt_power.readOp.dynamic * pciep.clockRate - << " W" << endl; - cout << endl; - } else { - } -} - -void PCIeController::set_pcie_param() { - pciep.clockRate = XML->sys.pcie.clockrate; - pciep.clockRate *= 1e6; - pciep.num_units = XML->sys.pcie.number_units; - pciep.num_channels = XML->sys.pcie.num_channels; - pciep.duty_cycle = XML->sys.pcie.duty_cycle; - pciep.perc_load = XML->sys.pcie.total_load_perc; - pciep.type = XML->sys.pcie.type; - pciep.withPHY = XML->sys.pcie.withPHY; - - if (XML->sys.pcie.vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.pcie.vdd; - interface_ip.lop_Vdd = XML->sys.pcie.vdd; - interface_ip.lstp_Vdd = XML->sys.pcie.vdd; - } - - if (XML->sys.pcie.power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.pcie.power_gating_vcc; - } - // pciep.executionTime = - // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -} -#endif - -#if 0 -FlashController::FlashController(ParseXML *XML_interface, - InputParameter *interface_ip_) - : XML(XML_interface), interface_ip(*interface_ip_) { - - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates, frontend_gates, SerDer_gates = 0.; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - - set_fc_param(); - local_result = init_interface(&interface_ip); - if (fcp.type == 0) // high performance NIU - { - cout << "Current McPAT does not support high performance flash contorller " - "since even low power designs are enough for maintain throughput" - << endl; - exit(0); - NMOS_sizing = 5 * g_tp.min_w_nmos_; - PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - } else { - ctrl_area = - 0.243 * (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); - // Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from - // CAST - SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) * - (interface_ip.F_sz_um / 0.065); - // based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support - // 8x lanes with each lane speed up to 250MB/s (PCIe1.1x) This is already - // saturate the 200MB/s of the flash controller core above. - ctrl_gates = 129267; - SerDer_gates = 200000 / 8; - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - - // Power - // Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s - // This is power not energy! - ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 * - (interface_ip.F_sz_nm / 65.0); - // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; - // max Per controller speed is 1.6Gb/s (200MB/s) - } - double number_channel = 1 + (fcp.num_channels - 1) * 0.2; - area.set_area((ctrl_area + (fcp.withPHY ? SerDer_area : 0)) * 1e6 * - number_channel); - power_t.readOp.dynamic = - (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) * number_channel; - power_t.readOp.leakage = - ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) * - cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - double long_channel_device_reduction = - longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = - power_t.readOp.leakage * long_channel_device_reduction; - double pg_reduction = power_gating_leakage_reduction( - false); // array structure all retain state; - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = - power_t.readOp.power_gated_leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = - ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) * - cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W -} - -void FlashController::computeEnergy(bool is_tdp) { - if (is_tdp) { - - power = power_t; - power.readOp.dynamic *= fcp.duty_cycle; - - } else { - rt_power = power_t; - rt_power.readOp.dynamic *= fcp.perc_load; - } -} - -void FlashController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << "Flash Controller:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" - << endl; // no multiply of clock since this is power already - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic - << " W" << endl; - cout << endl; - } else { - } -} - -void FlashController::set_fc_param() { - // fcp.clockRate = XML->sys.flashc.mc_clock; - // fcp.clockRate *= 1e6; - fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; - fcp.num_channels = ceil(fcp.peakDataTransferRate / 200); - fcp.num_mcs = XML->sys.flashc.number_mcs; - fcp.duty_cycle = XML->sys.flashc.duty_cycle; - fcp.perc_load = XML->sys.flashc.total_load_perc; - fcp.type = XML->sys.flashc.type; - fcp.withPHY = XML->sys.flashc.withPHY; - // flashcp.executionTime = - // XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - if (XML->sys.flashc.vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.flashc.vdd; - interface_ip.lop_Vdd = XML->sys.flashc.vdd; - interface_ip.lstp_Vdd = XML->sys.flashc.vdd; - } - if (XML->sys.flashc.power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; - } -} -#endif diff --git a/src/iocontrollers/CMakeLists.txt b/src/iocontrollers/CMakeLists.txt index 2ced58e..47b8e27 100644 --- a/src/iocontrollers/CMakeLists.txt +++ b/src/iocontrollers/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(iocontrollers flash_controller.cc pcie_controller.h pcie_controller.cc + niu_controller.h + niu_controller.cc ) target_include_directories(iocontrollers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(iocontrollers LINK_PUBLIC cacti top) diff --git a/src/iocontrollers/niu_controller.cc b/src/iocontrollers/niu_controller.cc new file mode 100644 index 0000000..a2be32d --- /dev/null +++ b/src/iocontrollers/niu_controller.cc @@ -0,0 +1,333 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + * Author: + * Andrew Smith + ***************************************************************************/ +#include "niu_controller.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* + * NIUController() + * Constructor, Initializes the member variables that are shared across + * methods. + */ +NIUController::NIUController() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; +} + +/* + * computeArea() + * Computes the component area based off of the input parameters in the XML. + * Side Effects: + * Sets the component area member to the calculated area. + * Input: + * None + * Output: + * None + */ +void NIUController::computeArea() { + double frontend_area = 0.0; + double phy_area = 0.0; + double SerDer_area = 0.0; + double mac_area = 0.0; + + if (!init_params) { + std::cerr << "[ NIUController ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + local_result = init_interface(&ip); + if (niup.type == 0) // high performance NIU + { + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate using 65nm. + mac_area = (1.53 + 0.3) / 2 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2, ISSCC "An + // 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" and"A 1.2-V-Only 900-mW + // 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning + // Technique" Frontend is PCS + frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * + (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Area estimation based on average of die photo from Niagara 2 and Cadence + // ChipEstimate hard IP @65nm. SerDer is very hard to scale + SerDer_area = (1.39 + 0.36) * (ip.F_sz_um / 0.065); //* (ip.F_sz_um/0.065); + phy_area = frontend_area + SerDer_area; + // total area + area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); + + } else { // Low power implementations are mostly from Cadence ChipEstimator; + // Ignore the multiple IP effect + // ---When there are multiple IP (same kind or not) selected, Cadence + // ChipEstimator results are not a simple summation of all IPs. Ignore this + // effect + mac_area = 0.24 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + frontend_area = 0.1 * (ip.F_sz_um / 0.065) * + (ip.F_sz_um / 0.065); // Frontend is the PCS layer + SerDer_area = 0.35 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); + // Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet + // Transceiver and XAUI Interface With Robust VCO Tuning Technique" and the + // ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly + // with the technology total area + area.set_area((mac_area + frontend_area + SerDer_area) * 1e6); + } +} + +/* + * computeStaticPower() + * Computes the static power based off of the input parameters from the xml. + * It calculates leakage power, + * + * TODO: Add Vdd such that the static power & dynamic power can reflect + * changes in the chip power supply. + * + * Side Effects: + * Sets the static power, leakage, and power gated leakage + * Input: + * None + * Output: + * None + */ +void NIUController::computeStaticPower() { + double frontend_dyn = 0.0; + double SerDer_dyn = 0.0; + double mac_dyn = 0.0; + double frontend_gates = 0.0; + double SerDer_gates = 0.0; + double mac_gates = 0.0; + double NMOS_sizing = 0.0; + double PMOS_sizing = 0.0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + + if (!init_params) { + std::cerr << "[ NIUController ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + if (niup.type == 0) // high performance NIU + { + // Power + // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = + // P/F = 1.37/1Ghz = 1.37e-9); + mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * + (ip.F_sz_nm / 65.0); // niup.clockRate; //2.19W@1GHz fully active + // according to Cadence ChipEstimate @65nm + // Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (ip.F_sz_nm / 65.0); // niup.clockRate; + // according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 + // SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 10 * sqrt(ip.F_sz_um / 0.09) * g_tp.peri_global.Vdd / + 1.2 * g_tp.peri_global.Vdd / 1.2; + SerDer_dyn /= + niup.clockRate; // covert to energy per clock cycle of whole NIU + + // Cadence ChipEstimate using 65nm + mac_gates = 111700; + frontend_gates = 320000; + SerDer_gates = 200000; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { // Low power implementations are mostly from Cadence ChipEstimator; + // Power + // Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = + // P/F = 1.37/1Ghz = 1.37e-9); + mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * + (ip.F_sz_nm / 65.0); // niup.clockRate; //2.19W@1GHz fully active + // according to Cadence ChipEstimate @65nm + // Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (ip.F_sz_nm / 65.0); // niup.clockRate; + // SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm + SerDer_dyn = 0.0216 * 10 * (ip.F_sz_um / 0.13) * g_tp.peri_global.Vdd / + 1.2 * g_tp.peri_global.Vdd / 1.2; + SerDer_dyn /= + niup.clockRate; // covert to energy per clock cycle of whole NIU + + mac_gates = 111700; + frontend_gates = 52000; + SerDer_gates = 199260; + + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } + power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; + power_t.readOp.leakage = + (mac_gates + frontend_gates + frontend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; + power_t.readOp.gate_leakage = + (mac_gates + frontend_gates + frontend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W +} + +/* + * computeDynamicPower() + * Compute both Peak Power and Runtime Power based on the stats of the input + * xml. + * Side Effects: + * Sets the runtime power, and the peak dynamic power in the component + * class + * Input: + * None + * Output: + * None + */ +void NIUController::computeDynamicPower() { + power = power_t; + power.readOp.dynamic *= niup.duty_cycle; + rt_power = power_t; + rt_power.readOp.dynamic *= niup.perc_load; +} + +/* + * display(uint32_t, bool) + * Display the Power, Area, and Timing results to the standard output + * Side Effects: + * None + * Input: + * indent - How far in to indent + * enable - toggle printing + * Output: + * None + */ +void NIUController::display(uint32_t indent, bool enable) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + + if (enable) { + cout << "NIU:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * niup.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic * niup.clockRate + << " W" << endl; + cout << endl; + } else { + } +} + +/* + * set_params(const ParseXML, InputParameter) + * Sets the parts of the flash controller params that contribute to area and + * static power. Must be called before computing area or static power. + * Side Effects: + * sets the interface_ip struct, and sets the params struct to the + * "params" from the xml file. Also sets init_params to true. + * Input: + * *XML - Parsed XML + * *interface_ip - Interface from McPAT used in Cacti Library + * Output: + * None + */ +void NIUController::set_params(const ParseXML *XML, + InputParameter *interface_ip) { + ip = *interface_ip; + niup.clockRate = XML->sys.niu.clockrate; + niup.clockRate *= 1e6; + niup.num_units = XML->sys.niu.number_units; + niup.type = XML->sys.niu.type; + + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + + if (XML->sys.niu.vdd > 0) { + ip.specific_hp_vdd = true; + ip.specific_lop_vdd = true; + ip.specific_lstp_vdd = true; + ip.hp_Vdd = XML->sys.niu.vdd; + ip.lop_Vdd = XML->sys.niu.vdd; + ip.lstp_Vdd = XML->sys.niu.vdd; + } + + if (XML->sys.niu.power_gating_vcc > -1) { + ip.specific_vcc_min = true; + ip.user_defined_vcc_min = XML->sys.niu.power_gating_vcc; + } + init_params = true; +} + +/* + * set_stats(const ParseXML) + * Sets the parts of the flash controller params that contribute to dynamic + * power. + * Side Effects: + * Store duty cycle and and percentage load into fc params, sets + * init_stats to true + * Input: + * *XML - Parsed XML + * Output: + * None + */ +void NIUController::set_stats(const ParseXML *XML) { + niup.duty_cycle = XML->sys.niu.duty_cycle; + niup.perc_load = XML->sys.niu.total_load_perc; + init_stats = true; +} diff --git a/src/iocontrollers.h b/src/iocontrollers/niu_controller.h similarity index 62% rename from src/iocontrollers.h rename to src/iocontrollers/niu_controller.h index 3f7d072..f0d6bc1 100644 --- a/src/iocontrollers.h +++ b/src/iocontrollers/niu_controller.h @@ -27,63 +27,39 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * + * Author: + * Andrew Smith ***************************************************************************/ -#ifndef IOCONTROLLERS_H_ -#define IOCONTROLLERS_H_ - -#endif /* IOCONTROLLERS_H_ */ +#ifndef __NIU_CONTROLLER_H__ +#define __NIU_CONTROLLER_H__ #include "XML_Parse.h" -#include "parameter.h" -//#include "io.h" #include "array.h" -//#include "Undifferentiated_Core_Area.h" #include "basic_components.h" +#include "parameter.h" #include class NIUController : public Component { public: - ParseXML *XML; - InputParameter interface_ip; + InputParameter ip; NIUParam niup; powerDef power_t; uca_org_t local_result; - NIUController(ParseXML *XML_interface, InputParameter *interface_ip_); - void set_niu_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~NIUController(){}; -}; -#if 0 -class PCIeController : public Component { -public: - ParseXML *XML; - InputParameter interface_ip; - PCIeParam pciep; - powerDef power_t; - uca_org_t local_result; - PCIeController(ParseXML *XML_interface, InputParameter *interface_ip_); - void set_pcie_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~PCIeController(){}; -}; -#endif + NIUController(); + void set_params(const ParseXML *XML, InputParameter *interface_ip); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); -#if 0 -class FlashController : public Component { -public: - ParseXML *XML; - InputParameter interface_ip; - MCParam fcp; - powerDef power_t; - uca_org_t local_result; - FlashController(ParseXML *XML_interface, InputParameter *interface_ip_); - void set_fc_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~FlashController(){}; +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; }; -#endif + +#endif // __NIU_CONTROLLER_H__ diff --git a/src/iocontrollers/pcie_controller.cc b/src/iocontrollers/pcie_controller.cc index 5953280..b214086 100644 --- a/src/iocontrollers/pcie_controller.cc +++ b/src/iocontrollers/pcie_controller.cc @@ -289,6 +289,7 @@ void PCIeController::set_params(const ParseXML *XML, pciep.num_channels = XML->sys.pcie.num_channels; pciep.type = XML->sys.pcie.type; pciep.withPHY = XML->sys.pcie.withPHY; + long_channel = XML->sys.longer_channel_device; power_gating = XML->sys.power_gating; diff --git a/src/processor.cc b/src/processor.cc index 33e6b0c..2412aab 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -47,7 +47,7 @@ Processor::Processor(ParseXML *XML_interface) : XML(XML_interface), // TODO: using one global copy may have problems. - mc(nullptr), niu(nullptr) { + mc(nullptr) { /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory @@ -359,26 +359,28 @@ Processor::Processor(ParseXML *XML_interface) } if (XML->sys.niu.number_units > 0) { - niu = new NIUController(XML, &interface_ip); - niu->computeEnergy(); - niu->computeEnergy(false); + niu.set_params(XML, &interface_ip); + niu.computeArea(); + niu.computeStaticPower(); nius.area.set_area(nius.area.get_area() + - niu->area.get_area() * XML->sys.niu.number_units); + niu.area.get_area() * XML->sys.niu.number_units); area.set_area(area.get_area() + - niu->area.get_area() * XML->sys.niu.number_units); + niu.area.get_area() * XML->sys.niu.number_units); set_pppm(pppm_t, - XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units * niu.niup.clockRate, XML->sys.niu.number_units, XML->sys.niu.number_units, XML->sys.niu.number_units); - nius.power = niu->power * pppm_t; + niu.set_stats(XML); + niu.computeDynamicPower(); + nius.power = niu.power * pppm_t; power = power + nius.power; set_pppm(pppm_t, - XML->sys.niu.number_units * niu->niup.clockRate, + XML->sys.niu.number_units * niu.niup.clockRate, XML->sys.niu.number_units, XML->sys.niu.number_units, XML->sys.niu.number_units); - nius.rt_power = niu->rt_power * pppm_t; + nius.rt_power = niu.rt_power * pppm_t; rt_power = rt_power + nius.rt_power; } @@ -834,7 +836,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; } if (XML->sys.niu.number_units > 0) { - cout << indent_str << "Total NIUs: " << niu->niup.num_units + cout << indent_str << "Total NIUs: " << niu.niup.num_units << " Network Interface Units " << endl; displayDeviceType(XML->sys.device_type, indent); cout << indent_str_next << "Area = " << nius.area.get_area() * 1e-6 @@ -936,7 +938,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << endl; } if (XML->sys.niu.number_units > 0) { - niu->displayEnergy(indent + 4, is_tdp); + niu.display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1113,8 +1115,4 @@ Processor::~Processor() { delete mc; mc = nullptr; } - if (niu) { - delete niu; - niu = nullptr; - } }; diff --git a/src/processor.h b/src/processor.h index 183dba1..41eb743 100644 --- a/src/processor.h +++ b/src/processor.h @@ -39,8 +39,8 @@ #include "core.h" #include "decoder.h" #include "flash_controller.h" -#include "iocontrollers.h" #include "memoryctrl.h" +#include "niu_controller.h" #include "noc.h" #include "parameter.h" #include "pcie_controller.h" @@ -59,7 +59,7 @@ class Processor : public Component { vector l2dirarray; vector nocs; MemoryController *mc; - NIUController *niu; + NIUController niu; PCIeController pcie; FlashController flashcontroller; InputParameter interface_ip; From 238afbf04c9b0a20593a0f41ef1fb1294cd5b917 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 10 Jun 2020 11:01:30 -0500 Subject: [PATCH 12/59] refactor-serialization: Refactored MC Backend Refactored the Flash Controller Module to separate the Area calculations from the Power Calculations --- src/CMakeLists.txt | 7 +- src/memoryctrl.cc | 110 ++---------- src/memoryctrl.h | 13 +- src/memoryctrl/CMakeLists.txt | 6 + src/memoryctrl/mc_backend.cc | 324 ++++++++++++++++++++++++++++++++++ src/memoryctrl/mc_backend.h | 72 ++++++++ src/processor.cc | 2 +- 7 files changed, 429 insertions(+), 105 deletions(-) create mode 100644 src/memoryctrl/CMakeLists.txt create mode 100644 src/memoryctrl/mc_backend.cc create mode 100644 src/memoryctrl/mc_backend.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 936314e..8c9c308 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(cacti) add_subdirectory(iocontrollers) +add_subdirectory(memoryctrl) add_library(top arch_const.h @@ -31,19 +32,17 @@ add_library(top XML_Parse.cc ) target_include_directories(top PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(top LINK_PUBLIC cacti iocontrollers) +target_link_libraries(top LINK_PUBLIC cacti iocontrollers memoryctrl) add_executable(mcpat main.cc) target_link_libraries(mcpat LINK_PUBLIC - cacti top - iocontrollers Threads::Threads Boost::program_options) add_custom_command(TARGET mcpat POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PROJECT_BINARY_DIR}/mcpat COMMENT "Copying executable ${PROJECT_BINARY_DIR}/mcpat" - ) +) diff --git a/src/memoryctrl.cc b/src/memoryctrl.cc index e5d6205..51cc3c8 100644 --- a/src/memoryctrl.cc +++ b/src/memoryctrl.cc @@ -73,6 +73,7 @@ * */ +#if 0 MCBackend::MCBackend(InputParameter *interface_ip_, const MCParam &mcp_, enum MemoryCtrl_type mc_type_) @@ -221,6 +222,7 @@ void MCBackend::computeEnergy(bool is_tdp) { // refreshing and scrubbing } } +#endif MCPHY::MCPHY(InputParameter *interface_ip_, const MCParam &mcp_, @@ -701,7 +703,7 @@ MemoryController::MemoryController(ParseXML *XML_interface, InputParameter *interface_ip_, enum MemoryCtrl_type mc_type_) : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - frontend(0), transecEngine(0), PHY(0), pipeLogic(0) { + frontend(0), PHY(0), pipeLogic(0) { /* All computations are for a single MC * */ @@ -709,84 +711,43 @@ MemoryController::MemoryController(ParseXML *XML_interface, interface_ip.wire_os_mat_type = 2; interface_ip.wt = Global; set_mc_param(); + transecEngine.set_params(XML, mcp, &interface_ip, mc_type); + transecEngine.set_stats(mcp); + transecEngine.computeArea(); + transecEngine.computeStaticPower(); frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); area.set_area(area.get_area() + frontend->area.get_area()); - transecEngine = new MCBackend(&interface_ip, mcp, mc_type); - area.set_area(area.get_area() + transecEngine->area.get_area()); + area.set_area(area.get_area() + transecEngine.area.get_area()); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY = new MCPHY(&interface_ip, mcp, mc_type); area.set_area(area.get_area() + PHY->area.get_area()); } - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, - // Run the RTL code from OpenSparc. - // transecEngine.initialize(&interface_ip); - // transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; - // transecEngine.memDataWidth = dataBusWidth; - // transecEngine.memRank = XML->sys.mem.number_ranks; - // //transecEngine.memAccesses=XML->sys.mc.memory_accesses; - // //transecEngine.llcBlocksize=llcBlockSize; - // transecEngine.compute(); - // transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) - // ; area.set_area(area.get_area()+ transecEngine.area.get_area()); - // ///cout<<"area="<sys.mem.peak_transfer_rate; - // PHY.memDataWidth = dataBusWidth; - // //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - // //PHY.llcBlocksize=llcBlockSize; - // PHY.compute(); - // PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) - // ; area.set_area(area.get_area()+ PHY.area.get_area()); - /// cout<<"area="<sys.core[0].opcode_width + dataBusWidth; pipeLogic = new - // pipeline(is_default, &interface_ip); - // //pipeLogic.init_pipeline(is_default, &interface_ip); - // pipeLogic->compute_pipeline(); - // area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); - // area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing - // overhead - // - // - //// //clock - //// clockNetwork.init_wire_external(is_default, &interface_ip); - //// clockNetwork.clk_area =area*1.1;//10% of placement overhead. - /// rule of thumb / clockNetwork.end_wiring_level =5;//toplevel metal / - /// clockNetwork.start_wiring_level =5;//toplevel metal / - /// clockNetwork.num_regs = pipeLogic.tot_stage_vector; / - /// clockNetwork.optimize_wire(); } void MemoryController::computeEnergy(bool is_tdp) { - frontend->computeEnergy(is_tdp); - transecEngine->computeEnergy(is_tdp); + transecEngine.computeDynamicPower(); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY->computeEnergy(is_tdp); } if (is_tdp) { - power = power + frontend->power + transecEngine->power; + power = power + frontend->power + transecEngine.power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { power = power + PHY->power; } } else { - rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; + rt_power = rt_power + frontend->rt_power + transecEngine.rt_power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { rt_power = rt_power + PHY->rt_power; } } } -void MemoryController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); bool long_channel = XML->sys.longer_channel_device; bool power_gating = XML->sys.power_gating; - - if (is_tdp) { + if (enable) { cout << "Memory Controller:" << endl; cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" << endl; @@ -808,6 +769,7 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << "Runtime Dynamic = " << rt_power.readOp.dynamic / mcp.executionTime << " W" << endl; cout << endl; + cout << indent_str << "Front End Engine:" << endl; cout << indent_str_next << "Area = " << frontend->area.get_area() * 1e-6 << " mm^2" << endl; @@ -832,33 +794,9 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << frontend->rt_power.readOp.dynamic / mcp.executionTime << " W" << endl; cout << endl; - if (plevel > 2) { - frontend->displayEnergy(indent + 4, is_tdp); - } - cout << indent_str << "Transaction Engine:" << endl; - cout << indent_str_next - << "Area = " << transecEngine->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << transecEngine->power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? transecEngine->power.readOp.longer_channel_leakage - : transecEngine->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? transecEngine->power.readOp - .power_gated_with_long_channel_leakage - : transecEngine->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << transecEngine->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; + transecEngine.display(indent, true); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + // PHY.display(indent, true); cout << indent_str << "PHY:" << endl; cout << indent_str_next << "Area = " << PHY->area.get_area() * 1e-6 << " mm^2" << endl; @@ -882,18 +820,6 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << PHY->rt_power.readOp.dynamic / mcp.executionTime << " W" << endl; cout << endl; } - } else { - cout << "Memory Controller:" << endl; - cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage - << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << endl; - cout << endl; } } @@ -1007,10 +933,6 @@ MemoryController ::~MemoryController() { delete frontend; frontend = 0; } - if (transecEngine) { - delete transecEngine; - transecEngine = 0; - } if (PHY) { delete PHY; PHY = 0; diff --git a/src/memoryctrl.h b/src/memoryctrl.h index e942ead..bf2b8de 100644 --- a/src/memoryctrl.h +++ b/src/memoryctrl.h @@ -33,15 +33,15 @@ #define MEMORYCTRL_H_ #include "XML_Parse.h" -#include "logic.h" -#include "parameter.h" -//#include "io.h" #include "array.h" -//#include "Undifferentiated_Core_Area.h" #include "basic_components.h" +#include "logic.h" +#include "mc_backend.h" +#include "parameter.h" #include +#if 0 class MCBackend : public Component { public: InputParameter l_ip; @@ -60,6 +60,7 @@ class MCBackend : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~MCBackend(){}; }; +#endif class MCPHY : public Component { public: @@ -107,7 +108,7 @@ class MemoryController : public Component { enum MemoryCtrl_type mc_type; MCParam mcp; MCFrontEnd *frontend; - MCBackend *transecEngine; + MCBackend transecEngine; MCPHY *PHY; Pipeline *pipeLogic; @@ -117,7 +118,7 @@ class MemoryController : public Component { enum MemoryCtrl_type mc_type_); void set_mc_param(); void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool enable = true); ~MemoryController(); }; #endif /* MEMORYCTRL_H_ */ diff --git a/src/memoryctrl/CMakeLists.txt b/src/memoryctrl/CMakeLists.txt new file mode 100644 index 0000000..50c5e99 --- /dev/null +++ b/src/memoryctrl/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(memoryctrl + mc_backend.h + mc_backend.cc +) +target_include_directories(memoryctrl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(memoryctrl LINK_PUBLIC cacti top) diff --git a/src/memoryctrl/mc_backend.cc b/src/memoryctrl/mc_backend.cc new file mode 100644 index 0000000..3a4b5f9 --- /dev/null +++ b/src/memoryctrl/mc_backend.cc @@ -0,0 +1,324 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ +#include "mc_backend.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* + * FlashController() + * Constructor, Initializes the member variables that are shared across + * methods. + */ +MCBackend::MCBackend() { + long_channel = false; + power_gating = false; + mc_type = MC; +} + +/* + * computeArea() + * Computes the component area based off of the input parameters in the XML. + * Side Effects: + * Sets the component area member to the calculated area. + * Input: + * None + * Output: + * None + */ +void MCBackend::computeArea() { + if (!init_params) { + std::cerr << "[ MCBackend ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + local_result = init_interface(&ip); + if (mc_type == MC) { + if (mcp.type == 0) { + // area = + // (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(ip.F_sz_um/0.09); + area.set_area((2.7927 * log(mcp.peakDataTransferRate * 2) - 19.862) / + 2.0 * mcp.dataBusWidth / 128.0 * (ip.F_sz_um / 0.09) * + mcp.num_channels * 1e6); // um^2 + } else { + area.set_area(0.15 * mcp.dataBusWidth / 72.0 * (ip.F_sz_um / 0.065) * + (ip.F_sz_um / 0.065) * mcp.num_channels * 1e6); // um^2 + } + } else { // skip old model + std::cerr << "[ MCBackend ] Error: Unknown memory controllers" << std::endl; + exit(1); + } +} + +/* + * computeStaticPower() + * Computes the static power based off of the input parameters from the xml. + * It calculates leakage power, + * + * TODO: Add Vdd such that the static power & dynamic power can reflect + * changes in the chip power supply. + * + * Side Effects: + * Sets the static power, leakage, and power gated leakage + * Input: + * None + * Output: + * None + */ +void MCBackend::computeStaticPower() { + // double max_row_addr_width = 20.0;//Current address 12~18bits + double C_MCB = 0.0; + double mc_power = 0.0; + double backend_dyn = 0.0; + double backend_gates = 0.0; + // double refresh_period = 0.0; + // double refresh_freq = 0.0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing = 0.0; + double PMOS_sizing = 0.0; + if (!init_params) { + std::cerr << "[ MCBackend ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + local_result = init_interface(&ip); + if (mc_type == MC) { + if (mcp.type == 0) { + // assuming the approximately same scaling factor as seen in processors. + // C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode + // processor which has a very basic mc on chip. C_MCB + // = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power + // numbers.The base power (W) is divided by device frequency and vdd and + // scale to target process. mc_power = 0.0291*2;//29.1mW@200MHz @130nm + // From Power Analysis of SystemLevel OnChip Communication Architectures + // by Lahiri et + mc_power = + 4.32 * + 0.1; // 4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend + C_MCB = mc_power / 1e9 / 72 / 1.1 / 1.1 * ip.F_sz_um / 0.065; + power_t.readOp.dynamic = + C_MCB * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * + (mcp.dataBusWidth /*+mcp.addressBusWidth*/); // per access energy in + // memory controller + power_t.readOp.leakage = + area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(g_tp.min_w_nmos_, + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; // unit W + power_t.readOp.gate_leakage = + area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(g_tp.min_w_nmos_, + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; // unit W + + } else { + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + backend_dyn = + 0.9e-9 / 800e6 * mcp.clockRate / 12800 * mcp.peakDataTransferRate * + mcp.dataBusWidth / 72.0 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * + (ip.F_sz_nm / 65.0); // Average on DDR2/3 protocol controller and + // DDRC 1600/800A in Cadence ChipEstimate + // Scaling to technology and DIMM feature. The base IP support + // DDR3-1600(PC3 12800) + backend_gates = 50000 * mcp.dataBusWidth / + 64.0; // 50000 is from Cadence ChipEstimator + + power_t.readOp.dynamic = backend_dyn; + power_t.readOp.leakage = + (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + power_t.readOp.gate_leakage = + (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + } + } else { // skip old model + std::cerr << "[ MCBackend ] Error: Unknown memory controllers" << std::endl; + exit(1); + } + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; +} + +/* + * computeDynamicPower() + * Compute both Peak Power and Runtime Power based on the stats of the input + * xml. + * Side Effects: + * Sets the runtime power, and the peak dynamic power in the component + * class + * Input: + * None + * Output: + * None + */ +void MCBackend::computeDynamicPower() { + if (!init_stats) { + std::cerr << "[ MCBackend ] Error: must set stats before calling " + "computeDynamicPower()\n"; + exit(1); + } + // backend uses internal data buswidth + stats_t.readAc.access = 0.5 * mcp.num_channels; + stats_t.writeAc.access = 0.5 * mcp.num_channels; + tdp_stats = stats_t; + + stats_t.readAc.access = mcp.reads; + stats_t.writeAc.access = mcp.writes; + rtp_stats = stats_t; + power = power_t; + power.readOp.dynamic = (tdp_stats.readAc.access + tdp_stats.writeAc.access) * + power_t.readOp.dynamic; + + rt_power.readOp.dynamic = + (rtp_stats.readAc.access + rtp_stats.writeAc.access) * mcp.llcBlockSize * + 8.0 / mcp.dataBusWidth * power_t.readOp.dynamic; + rt_power = rt_power + power_t * pppm_lkg; + rt_power.readOp.dynamic = + rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * + mcp.num_mcs * mcp.executionTime; + // Assume 10% of peak power is consumed by routine job including memory + // refreshing and scrubbing +} + +/* + * set_params(const ParseXML, + * const MCParam&, + * InputParameter, + * const enum MemoryCtrl_type) + * Sets the parts of the flash controller params that contribute to area and + * static power. Must be called before computing area or static power. + * Side Effects: + * sets the interface_ip struct, and sets the params struct to the + * "params" from the xml file. Also sets init_params to true. + * Input: + * *XML - Parsed XML + * &MCParam - Parsed memory controller object from parent + * *interface_ip - Interface from McPAT used in Cacti Library + * MemoryCtrl_type - enum for type of memory controller + * Output: + * None + */ +void MCBackend::set_params(const ParseXML *XML, + const MCParam &mcp_, + InputParameter *interface_ip, + const enum MemoryCtrl_type mc_type_) { + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + mcp = mcp_; + ip = *interface_ip; + mc_type = mc_type_; + init_params = true; +} + +/* + * set_stats(const MCParam&) + * Sets the parts of the flash controller params that contribute to dynamic + * power. + * Side Effects: + * Store duty cycle and and percentage load into fc params, sets + * init_stats to true + * Input: + * MCParam - Parent Parsed MCParam Object + * Output: + * None + */ +void MCBackend::set_stats(const MCParam &mcp_) { + mcp = mcp_; + init_stats = true; +} + +/* + * display(uint32_t, bool) + * Display the Power, Area, and Timing results to the standard output + * Side Effects: + * None + * Input: + * indent - How far in to indent + * enable - toggle printing + * Output: + * None + */ +void MCBackend::display(uint32_t indent, bool enable) { + std::string indent_str(indent, ' '); + std::string indent_str_next(indent + 2, ' '); + + if (enable) { + std::cout << indent_str << "Transaction Engine:" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate + << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / mcp.executionTime << " W" + << std::endl; + std::cout << std::endl; + } +} diff --git a/src/memoryctrl/mc_backend.h b/src/memoryctrl/mc_backend.h new file mode 100644 index 0000000..2e5f8cd --- /dev/null +++ b/src/memoryctrl/mc_backend.h @@ -0,0 +1,72 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __MC_BACKEND_H__ +#define __MC_BACKEND_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "logic.h" +#include "parameter.h" + +#include + +class MCBackend : public Component { +public: + InputParameter ip; + uca_org_t local_result; + enum MemoryCtrl_type mc_type; + MCParam mcp; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + MCBackend(); + void set_params(const ParseXML *XML, + const MCParam &mcp_, + InputParameter *interface_ip, + const enum MemoryCtrl_type mc_type_); + void set_stats(const MCParam &mcp_); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); + ~MCBackend(){}; + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; +}; + +#endif // __MC_BACKEND_H__ diff --git a/src/processor.cc b/src/processor.cc index 2412aab..3bb86bf 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -925,7 +925,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - mc->displayEnergy(indent + 4, is_tdp); + mc->displayEnergy(indent + 4); cout << "**************************************************************" "***************************" << endl; From c729f9346f2eb5951d453f575273da96814fcd9e Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 10 Jun 2020 15:52:07 -0500 Subject: [PATCH 13/59] refactor-serialization: Refactored MC PHY Refactored the MC PHY Module to separate the Area calculations from the Power Calculations --- src/memoryctrl.cc | 48 ++--- src/memoryctrl.h | 5 +- src/memoryctrl/CMakeLists.txt | 2 + src/memoryctrl/mc_backend.cc | 6 +- src/memoryctrl/mc_phy.cc | 337 ++++++++++++++++++++++++++++++++++ src/memoryctrl/mc_phy.h | 72 ++++++++ 6 files changed, 432 insertions(+), 38 deletions(-) create mode 100644 src/memoryctrl/mc_phy.cc create mode 100644 src/memoryctrl/mc_phy.h diff --git a/src/memoryctrl.cc b/src/memoryctrl.cc index 51cc3c8..5fc0b72 100644 --- a/src/memoryctrl.cc +++ b/src/memoryctrl.cc @@ -224,6 +224,7 @@ void MCBackend::computeEnergy(bool is_tdp) { } #endif +#if 0 MCPHY::MCPHY(InputParameter *interface_ip_, const MCParam &mcp_, enum MemoryCtrl_type mc_type_) @@ -365,6 +366,7 @@ void MCPHY::computeEnergy(bool is_tdp) { mcp.num_mcs * mcp.executionTime; } } +#endif MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, InputParameter *interface_ip_, @@ -703,7 +705,7 @@ MemoryController::MemoryController(ParseXML *XML_interface, InputParameter *interface_ip_, enum MemoryCtrl_type mc_type_) : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - frontend(0), PHY(0), pipeLogic(0) { + frontend(0), pipeLogic(0) { /* All computations are for a single MC * */ @@ -719,25 +721,29 @@ MemoryController::MemoryController(ParseXML *XML_interface, area.set_area(area.get_area() + frontend->area.get_area()); area.set_area(area.get_area() + transecEngine.area.get_area()); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY = new MCPHY(&interface_ip, mcp, mc_type); - area.set_area(area.get_area() + PHY->area.get_area()); + PHY.set_params(XML, mcp, &interface_ip, mc_type); + PHY.set_stats(mcp); + PHY.computeArea(); + PHY.computeStaticPower(); + area.set_area(area.get_area() + PHY.area.get_area()); } } + void MemoryController::computeEnergy(bool is_tdp) { frontend->computeEnergy(is_tdp); transecEngine.computeDynamicPower(); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY->computeEnergy(is_tdp); + PHY.computeDynamicPower(); } if (is_tdp) { power = power + frontend->power + transecEngine.power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - power = power + PHY->power; + power = power + PHY.power; } } else { rt_power = rt_power + frontend->rt_power + transecEngine.rt_power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - rt_power = rt_power + PHY->rt_power; + rt_power = rt_power + PHY.rt_power; } } } @@ -796,35 +802,12 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { cout << endl; transecEngine.display(indent, true); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - // PHY.display(indent, true); - cout << indent_str << "PHY:" << endl; - cout << indent_str_next << "Area = " << PHY->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << PHY->power.readOp.dynamic * mcp.clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? PHY->power.readOp.longer_channel_leakage - : PHY->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? PHY->power.readOp.power_gated_with_long_channel_leakage - : PHY->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << PHY->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << PHY->rt_power.readOp.dynamic / mcp.executionTime << " W" << endl; - cout << endl; + PHY.display(indent, true); } } } void MemoryController::set_mc_param() { - if (mc_type == MC) { mcp.clockRate = XML->sys.mc.mc_clock * 2; // DDR double pumped mcp.clockRate *= 1e6; @@ -928,15 +911,10 @@ MCFrontEnd ::~MCFrontEnd() { } MemoryController ::~MemoryController() { - if (frontend) { delete frontend; frontend = 0; } - if (PHY) { - delete PHY; - PHY = 0; - } if (pipeLogic) { delete pipeLogic; pipeLogic = 0; diff --git a/src/memoryctrl.h b/src/memoryctrl.h index bf2b8de..39892cd 100644 --- a/src/memoryctrl.h +++ b/src/memoryctrl.h @@ -37,6 +37,7 @@ #include "basic_components.h" #include "logic.h" #include "mc_backend.h" +#include "mc_phy.h" #include "parameter.h" #include @@ -62,6 +63,7 @@ class MCBackend : public Component { }; #endif +#if 0 class MCPHY : public Component { public: InputParameter l_ip; @@ -80,6 +82,7 @@ class MCPHY : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~MCPHY(){}; }; +#endif class MCFrontEnd : public Component { public: @@ -109,7 +112,7 @@ class MemoryController : public Component { MCParam mcp; MCFrontEnd *frontend; MCBackend transecEngine; - MCPHY *PHY; + MCPHY PHY; Pipeline *pipeLogic; // clock_network clockNetwork; diff --git a/src/memoryctrl/CMakeLists.txt b/src/memoryctrl/CMakeLists.txt index 50c5e99..acae0ec 100644 --- a/src/memoryctrl/CMakeLists.txt +++ b/src/memoryctrl/CMakeLists.txt @@ -1,6 +1,8 @@ add_library(memoryctrl mc_backend.h mc_backend.cc + mc_phy.h + mc_phy.cc ) target_include_directories(memoryctrl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(memoryctrl LINK_PUBLIC cacti top) diff --git a/src/memoryctrl/mc_backend.cc b/src/memoryctrl/mc_backend.cc index 3a4b5f9..5d87df5 100644 --- a/src/memoryctrl/mc_backend.cc +++ b/src/memoryctrl/mc_backend.cc @@ -28,6 +28,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ + #include "mc_backend.h" #include "XML_Parse.h" @@ -45,13 +46,15 @@ #include /* - * FlashController() + * MCBackend() * Constructor, Initializes the member variables that are shared across * methods. */ MCBackend::MCBackend() { long_channel = false; power_gating = false; + init_params = false; + init_stats = false; mc_type = MC; } @@ -120,7 +123,6 @@ void MCBackend::computeStaticPower() { "computeStaticPower()\n"; exit(1); } - local_result = init_interface(&ip); if (mc_type == MC) { if (mcp.type == 0) { // assuming the approximately same scaling factor as seen in processors. diff --git a/src/memoryctrl/mc_phy.cc b/src/memoryctrl/mc_phy.cc new file mode 100644 index 0000000..52f73f5 --- /dev/null +++ b/src/memoryctrl/mc_phy.cc @@ -0,0 +1,337 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "mc_phy.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* + * MCPHY() + * Constructor, Initializes the member variables that are shared across + * methods. + */ +MCPHY::MCPHY() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; + mc_type = MC; +} + +/* + * computeArea() + * Computes the component area based off of the input parameters in the XML. + * Side Effects: + * Sets the component area member to the calculated area. + * Input: + * None + * Output: + * None + */ +void MCPHY::computeArea() { + if (!init_params) { + std::cerr << "[ MCPHY ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + local_result = init_interface(&ip); + if (mc_type == MC) { + if (mcp.type == 0) { + // Based on die photos from Niagara 1 and 2. + // TODO merge this into undifferentiated core.PHY only achieves square + // root of the ideal scaling. area = + // (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(ip.F_sz_um/0.09); + area.set_area((6.4323 * log(mcp.peakDataTransferRate * 2) - 48.134) * + mcp.dataBusWidth / 128.0 * (ip.F_sz_um / 0.09) * + mcp.num_channels * 1e6 / 2); // TODO:/2 + } else { + double non_IO_percentage = 0.2; + area.set_area(1.3 * non_IO_percentage / 2133.0e6 * mcp.clockRate / 17066 * + mcp.peakDataTransferRate * mcp.dataBusWidth / 16.0 * + (ip.F_sz_um / 0.040) * (ip.F_sz_um / 0.040) * + mcp.num_channels * 1e6); // um^2 + } + } else { + area.set_area(0.4e6 / 2 * mcp.dataBusWidth / + 8); // area based on Cadence ChipEstimator for 8bit bus + } +} + +/* + * computeStaticPower() + * Computes the static power based off of the input parameters from the xml. + * It calculates leakage power, + * + * TODO: Add Vdd such that the static power & dynamic power can reflect + * changes in the chip power supply. + * + * Side Effects: + * Sets the static power, leakage, and power gated leakage + * Input: + * None + * Output: + * None + */ +void MCPHY::computeStaticPower() { + // PHY uses internal data buswidth but the actuall off-chip datawidth is + // 64bits + ecc + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + /* + * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation + * memory interfaces ," ISSCC 2006; From Cadence ChipEstimator for normal I/O + * around 0.4~0.8 mW/Gb/s + */ + double power_per_gb_per_s = 0.0; + double phy_gates = 0.0; + double NMOS_sizing = 0.0; + double PMOS_sizing = 0.0; + + if (!init_params) { + std::cerr << "[ MCPHY ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + + if (mcp.type == 0 && mc_type == MC) { + power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04; + // This is from curve fitting based on Niagara 1 and 2's PHY die photo. + // This is power not energy, 10mw/Gb/s @90nm for each channel and scaling + // down power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change + // from Bytes to bits. + power_t.readOp.dynamic = power_per_gb_per_s * sqrt(ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / + 1.2; + power_t.readOp.leakage = + area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(g_tp.min_w_nmos_, + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; // unit W + power_t.readOp.gate_leakage = + area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(g_tp.min_w_nmos_, + g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd; // unit W + + } else { + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + // Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto + // DDR3 2133 (PC3 17066) + phy_gates = 200000 * mcp.dataBusWidth / 64.0; + power_per_gb_per_s = 0.01; + // This is power not energy, 10mw/Gb/s @90nm for each channel and scaling + // down + power_t.readOp.dynamic = power_per_gb_per_s * (ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / + 1.2; + power_t.readOp.leakage = + (mcp.withPHY ? phy_gates : 0) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + power_t.readOp.gate_leakage = + (mcp.withPHY ? phy_gates : 0) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd; // unit W + } + // double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power + // numbers are based on 72 bit DIMM interface power_t.readOp.dynamic *= + // phy_factor; power_t.readOp.leakage *= phy_factor; + // power_t.readOp.gate_leakage *= phy_factor; + + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power_t.readOp.longer_channel_leakage = + power_t.readOp.leakage * long_channel_device_reduction; + + double pg_reduction = power_gating_leakage_reduction(false); + power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; + power_t.readOp.power_gated_with_long_channel_leakage = + power_t.readOp.power_gated_leakage * long_channel_device_reduction; +} + +/* + * computeDynamicPower() + * Compute both Peak Power and Runtime Power based on the stats of the input + * xml. + * Side Effects: + * Sets the runtime power, and the peak dynamic power in the component + * class + * Input: + * None + * Output: + * None + */ +void MCPHY::computeDynamicPower() { + if (!init_stats) { + std::cerr << "[ MCPHY ] Error: must set stats before calling " + "computeDynamicPower()\n"; + exit(1); + } + // init stats for Peak + stats_t.readAc.access = 0.5 * mcp.num_channels; // time share on buses + stats_t.writeAc.access = 0.5 * mcp.num_channels; + tdp_stats = stats_t; + + // init stats for runtime power (RTP) + stats_t.readAc.access = mcp.reads; + stats_t.writeAc.access = mcp.writes; + rtp_stats = stats_t; + double data_transfer_unit = (mc_type == MC) ? 72 : 16; /*DIMM data width*/ + power = power_t; + power.readOp.dynamic = + power.readOp.dynamic * + (mcp.peakDataTransferRate * 8 * 1e6 / 1e9 /*change to Gbs*/) * + mcp.dataBusWidth / data_transfer_unit * mcp.num_channels / mcp.clockRate; + // divide by clock rate is for match the final computation where *clock is + // used + //(tdp_stats.readAc.access*power_t.readOp.dynamic+ + // tdp_stats.writeAc.access*power_t.readOp.dynamic); + + rt_power = power_t; + // rt_power.readOp.dynamic = + // (rtp_stats.readAc.access*power_t.readOp.dynamic+ + // rtp_stats.writeAc.access*power_t.readOp.dynamic); + + rt_power.readOp.dynamic = + power_t.readOp.dynamic * + (rtp_stats.readAc.access + rtp_stats.writeAc.access) * + (mcp.llcBlockSize) * 8 / 1e9 / mcp.executionTime * (mcp.executionTime); + rt_power.readOp.dynamic = + rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * + mcp.num_mcs * mcp.executionTime; +} + +/* + * set_params(const ParseXML, + * const MCParam&, + * InputParameter, + * const enum MemoryCtrl_type) + * Sets the parts of the flash controller params that contribute to area and + * static power. Must be called before computing area or static power. + * Side Effects: + * sets the interface_ip struct, and sets the params struct to the + * "params" from the xml file. Also sets init_params to true. + * Input: + * *XML - Parsed XML + * &MCParam - Parsed memory controller object from parent + * *interface_ip - Interface from McPAT used in Cacti Library + * MemoryCtrl_type - enum for type of memory controller + * Output: + * None + */ +void MCPHY::set_params(const ParseXML *XML, + const MCParam &mcp_, + InputParameter *interface_ip, + const enum MemoryCtrl_type mc_type_) { + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + mcp = mcp_; + ip = *interface_ip; + mc_type = mc_type_; + init_params = true; +} + +/* + * set_stats(const MCParam&) + * Sets the parts of the flash controller params that contribute to dynamic + * power. + * Side Effects: + * Store duty cycle and and percentage load into fc params, sets + * init_stats to true + * Input: + * MCParam - Parent Parsed MCParam Object + * Output: + * None + */ +void MCPHY::set_stats(const MCParam &mcp_) { + mcp = mcp_; + init_stats = true; +} + +/* + * display(uint32_t, bool) + * Display the Power, Area, and Timing results to the standard output + * Side Effects: + * None + * Input: + * indent - How far in to indent + * enable - toggle printing + * Output: + * None + */ +void MCPHY::display(uint32_t indent, bool enable) { + std::string indent_str(indent, ' '); + std::string indent_str_next(indent + 2, ' '); + + if (enable) { + std::cout << indent_str << "PHY:" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate + << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / mcp.executionTime << " W" + << std::endl; + std::cout << std::endl; + } +} diff --git a/src/memoryctrl/mc_phy.h b/src/memoryctrl/mc_phy.h new file mode 100644 index 0000000..39c5cd1 --- /dev/null +++ b/src/memoryctrl/mc_phy.h @@ -0,0 +1,72 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __MC_PHY_H__ +#define __MC_PHY_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "logic.h" +#include "parameter.h" + +#include + +class MCPHY : public Component { +public: + InputParameter ip; + uca_org_t local_result; + enum MemoryCtrl_type mc_type; + MCParam mcp; + statsDef tdp_stats; + statsDef rtp_stats; + statsDef stats_t; + powerDef power_t; + MCPHY(); + void set_params(const ParseXML *XML, + const MCParam &mcp_, + InputParameter *interface_ip, + const enum MemoryCtrl_type mc_type_); + void set_stats(const MCParam &mcp_); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); + ~MCPHY(){}; + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; +}; + +#endif // __MC_PHY_H__ From 7118de30943949f0a1f036b728f6836cb0b33132 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 10 Jun 2020 21:25:06 -0500 Subject: [PATCH 14/59] refactor-serialization: Split up the Memory Controller Files --- src/CMakeLists.txt | 2 - src/array.cc | 54 ++ src/array.h | 18 +- src/logic.cc | 1 + src/memoryctrl.cc | 922 ------------------------------ src/memoryctrl/CMakeLists.txt | 4 + src/memoryctrl/mc_backend.cc | 3 +- src/memoryctrl/mc_frontend.cc | 399 +++++++++++++ src/memoryctrl/mc_frontend.h | 63 ++ src/memoryctrl/memoryctrl.cc | 274 +++++++++ src/{ => memoryctrl}/memoryctrl.h | 69 +-- 11 files changed, 813 insertions(+), 996 deletions(-) delete mode 100644 src/memoryctrl.cc create mode 100644 src/memoryctrl/mc_frontend.cc create mode 100644 src/memoryctrl/mc_frontend.h create mode 100644 src/memoryctrl/memoryctrl.cc rename src/{ => memoryctrl}/memoryctrl.h (61%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c9c308..3b97b8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,8 +17,6 @@ add_library(top logic.cc noc.h noc.cc - memoryctrl.h - memoryctrl.cc options.h options.cc processor.h diff --git a/src/array.cc b/src/array.cc index 55cb2e1..d154378 100644 --- a/src/array.cc +++ b/src/array.cc @@ -78,6 +78,60 @@ void ArrayST::compute_base_power() { // } } +/* + * set_params(const InputParameter, + * string _name, + * enum Device_ty, + * bool opt_local_, + * enum Core_type core_ty, + * bool _is_default) + * Set the member variables so that optimization can be clalled in + * computeArea() + * Side Effects: + * Sets all the same member variables as the constructor + * Inputs: + * configure_interface - InputParameter + * _name - device name + * device_ty_ - device type + * opt_local - Optimization flag + * Outputs: + * None + */ +void ArrayST::set_params(const InputParameter *configure_interface, + string _name, + enum Device_ty device_ty_, + bool opt_local_, + enum Core_type core_ty_, + bool _is_default) { + l_ip = *configure_interface; + name = _name; + device_ty = device_ty_; + opt_local = opt_local_; + core_ty = core_ty_; + is_default = _is_default; + + if (l_ip.cache_sz < 64) + l_ip.cache_sz = 64; + if (l_ip.power_gating && (l_ip.assoc == 0)) { + l_ip.power_gating = false; + } + + l_ip.error_checking(); // not only do the error checking but also fill some + // missing parameters +} + +/* + * computeArea() + * wrapper around optimize array + * SideEffects: + * Optimizes the array area and power + * Inputs: + * None + * Output: + * None + */ +void ArrayST::computeArea() { optimize_array(); } + void ArrayST::optimize_array() { list candidate_solutions(0); list::iterator candidate_iter, min_dynamic_energy_iter; diff --git a/src/array.h b/src/array.h index 4431f01..6873cf0 100644 --- a/src/array.h +++ b/src/array.h @@ -29,8 +29,8 @@ * ***************************************************************************/ -#ifndef ARRAY_H_ -#define ARRAY_H_ +#ifndef __ARRAY_H__ +#define __ARRAY_H__ #include "basic_components.h" #include "cacti_interface.h" @@ -66,10 +66,18 @@ class ArrayST : public Component { statsDef stats_t; powerDef power_t; - virtual void optimize_array(); - virtual void compute_base_power(); + virtual void set_params(const InputParameter *configure_interface, + string _name, + enum Device_ty device_ty_, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, + bool _is_default = true); + virtual void computeArea(); virtual ~ArrayST(); +protected: + virtual void optimize_array(); + virtual void compute_base_power(); void leakage_feedback(double temperature); }; @@ -118,4 +126,4 @@ class DataCache : public InstCache { }; }; -#endif /* TLB_H_ */ +#endif /* __ARRAY_H__ */ diff --git a/src/logic.cc b/src/logic.cc index f782c9c..cff4164 100644 --- a/src/logic.cc +++ b/src/logic.cc @@ -1326,6 +1326,7 @@ void inst_decoder::inst_decoder_delay_power() { squencer_passes * num_decoder_segments); power = power + final_dec->power * pppm_t; } + void inst_decoder::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&l_ip); // init_result is dummy diff --git a/src/memoryctrl.cc b/src/memoryctrl.cc deleted file mode 100644 index 5fc0b72..0000000 --- a/src/memoryctrl.cc +++ /dev/null @@ -1,922 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ -#include "memoryctrl.h" - -#include "XML_Parse.h" -#include "basic_circuit.h" -#include "basic_components.h" -#include "const.h" -#include "io.h" -#include "logic.h" -#include "parameter.h" - -#include -#include -#include -#include -#include - -/* overview of MC models: - * McPAT memory controllers are modeled according to large number of industrial - * data points. The Basic memory controller architecture is base on the Synopsis - * designs (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite - * protocol controllers) as in Cadence ChipEstimator Tool - * - * An MC has 3 parts as shown in this design. McPAT models both high performance - * MC based on Niagara processor designs and curving and low power MC based on - * data points in Cadence ChipEstimator Tool. - * - * The frontend is modeled analytically, the backend is modeled empirically - * according to DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator - * Tool The PHY is modeled based on "A 100mW 9.6Gb/s Transceiver in 90nm CMOS - * for next-generation memory interfaces ," ISSCC 2006, and A 14mW 6.25Gb/s - * Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 - * - * In Cadence ChipEstimator Tool there are two types of memory controllers: the - * full memory controllers that includes the frontend as the DesignWare - * DDR2/DDR3-Lite memory controllers and the backend only memory controllers as - * the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite - * memory controllers, all memory controller IP in Cadence ChipEstimator Tool - * are backend memory controllers such as DDRC 1600A and DDRC 800A). Thus,to - * some extend the area and power difference between DesignWare DDR2/DDR3-Lite - * memory controllers and DDR2/DDR3-Lite protocol controllers can be an - * estimation to the frontend power and area, which is very close the - * analitically modeled results of the frontend for Niagara2@65nm - * - */ - -#if 0 -MCBackend::MCBackend(InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_) - : l_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_) { - - local_result = init_interface(&l_ip); - compute(); -} - -void MCBackend::compute() { - // double max_row_addr_width = 20.0;//Current address 12~18bits - double C_MCB, mc_power, backend_dyn, - backend_gates; //, refresh_period,refresh_freq;//Equivalent per bit Cap - // for backend, - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) { - if (mcp.type == 0) { - // area = - // (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); - area.set_area((2.7927 * log(mcp.peakDataTransferRate * 2) - 19.862) / - 2.0 * mcp.dataBusWidth / 128.0 * (l_ip.F_sz_um / 0.09) * - mcp.num_channels * 1e6); // um^2 - // assuming the approximately same scaling factor as seen in processors. - // C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode - // processor which has a very basic mc on chip. C_MCB - // = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power - // numbers.The base power (W) is divided by device frequency and vdd and - // scale to target process. mc_power = 0.0291*2;//29.1mW@200MHz @130nm - // From Power Analysis of SystemLevel OnChip Communication Architectures - // by Lahiri et - mc_power = - 4.32 * - 0.1; // 4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power / 1e9 / 72 / 1.1 / 1.1 * l_ip.F_sz_um / 0.065; - power_t.readOp.dynamic = - C_MCB * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * - (mcp.dataBusWidth /*+mcp.addressBusWidth*/); // per access energy in - // memory controller - power_t.readOp.leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.gate_leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - - } else { - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - area.set_area(0.15 * mcp.dataBusWidth / 72.0 * (l_ip.F_sz_um / 0.065) * - (l_ip.F_sz_um / 0.065) * mcp.num_channels * 1e6); // um^2 - backend_dyn = - 0.9e-9 / 800e6 * mcp.clockRate / 12800 * mcp.peakDataTransferRate * - mcp.dataBusWidth / 72.0 * g_tp.peri_global.Vdd / 1.1 * - g_tp.peri_global.Vdd / 1.1 * - (l_ip.F_sz_nm / 65.0); // Average on DDR2/3 protocol controller and - // DDRC 1600/800A in Cadence ChipEstimate - // Scaling to technology and DIMM feature. The base IP support - // DDR3-1600(PC3 12800) - backend_gates = 50000 * mcp.dataBusWidth / - 64.0; // 50000 is from Cadence ChipEstimator - - power_t.readOp.dynamic = backend_dyn; - power_t.readOp.leakage = - (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.gate_leakage = - (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - } - } else { // skip old model - cout << "Unknown memory controllers" << endl; - exit(0); - area.set_area(0.243 * mcp.dataBusWidth / - 8); // area based on Cadence ChipEstimator for 8bit bus - // mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for - // backend - C_MCB = mc_power / 1e9 / 72 / 1.1 / 1.1 * l_ip.F_sz_um / 0.065; - power_t.readOp.leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.gate_leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.dynamic *= 1.2; - power_t.readOp.leakage *= 1.2; - power_t.readOp.gate_leakage *= 1.2; - // flash controller has about 20% more backend power since BCH ECC in flash - // is complex and power hungry - } - double long_channel_device_reduction = - longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = - power_t.readOp.leakage * long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = - power_t.readOp.power_gated_leakage * long_channel_device_reduction; -} - -void MCBackend::computeEnergy(bool is_tdp) { - // backend uses internal data buswidth - if (is_tdp) { - // init stats for Peak - stats_t.readAc.access = 0.5 * mcp.num_channels; - stats_t.writeAc.access = 0.5 * mcp.num_channels; - tdp_stats = stats_t; - } else { - // init stats for runtime power (RTP) - stats_t.readAc.access = mcp.reads; - stats_t.writeAc.access = mcp.writes; - tdp_stats = stats_t; - } - if (is_tdp) { - power = power_t; - power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access) * - power_t.readOp.dynamic; - - } else { - rt_power.readOp.dynamic = (stats_t.readAc.access + stats_t.writeAc.access) * - mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * - power_t.readOp.dynamic; - rt_power = rt_power + power_t * pppm_lkg; - rt_power.readOp.dynamic = - rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * - mcp.num_mcs * mcp.executionTime; - // Assume 10% of peak power is consumed by routine job including memory - // refreshing and scrubbing - } -} -#endif - -#if 0 -MCPHY::MCPHY(InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_) - : l_ip(*interface_ip_), mc_type(mc_type_), mcp(mcp_) { - - local_result = init_interface(&l_ip); - compute(); -} - -void MCPHY::compute() { - // PHY uses internal data buswidth but the actuall off-chip datawidth is - // 64bits + ecc - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - /* - * according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation - * memory interfaces ," ISSCC 2006; From Cadence ChipEstimator for normal I/O - * around 0.4~0.8 mW/Gb/s - */ - double power_per_gb_per_s, phy_dyn, phy_gates, NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) { - if (mcp.type == 0) { - power_per_gb_per_s = mcp.LVDS ? 0.01 : 0.04; - // Based on die photos from Niagara 1 and 2. - // TODO merge this into undifferentiated core.PHY only achieves square - // root of the ideal scaling. area = - // (6.4323*log(peakDataTransferRate)-34.76)*memDataWidth/128.0*(l_ip.F_sz_um/0.09); - area.set_area((6.4323 * log(mcp.peakDataTransferRate * 2) - 48.134) * - mcp.dataBusWidth / 128.0 * (l_ip.F_sz_um / 0.09) * - mcp.num_channels * 1e6 / 2); // TODO:/2 - // This is from curve fitting based on Niagara 1 and 2's PHY die photo. - // This is power not energy, 10mw/Gb/s @90nm for each channel and scaling - // down power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change - // from Bytes to bits. - power_t.readOp.dynamic = power_per_gb_per_s * sqrt(l_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * - g_tp.peri_global.Vdd / 1.2; - power_t.readOp.leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.gate_leakage = - area.get_area() / 2 * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(g_tp.min_w_nmos_, - g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd; // unit W - - } else { - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - // Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm for upto - // DDR3 2133 (PC3 17066) - double non_IO_percentage = 0.2; - area.set_area(1.3 * non_IO_percentage / 2133.0e6 * mcp.clockRate / 17066 * - mcp.peakDataTransferRate * mcp.dataBusWidth / 16.0 * - (l_ip.F_sz_um / 0.040) * (l_ip.F_sz_um / 0.040) * - mcp.num_channels * 1e6); // um^2 - phy_gates = 200000 * mcp.dataBusWidth / 64.0; - power_per_gb_per_s = 0.01; - // This is power not energy, 10mw/Gb/s @90nm for each channel and scaling - // down - power_t.readOp.dynamic = power_per_gb_per_s * (l_ip.F_sz_um / 0.09) * - g_tp.peri_global.Vdd / 1.2 * - g_tp.peri_global.Vdd / 1.2; - power_t.readOp.leakage = - (mcp.withPHY ? phy_gates : 0) * - cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - power_t.readOp.gate_leakage = - (mcp.withPHY ? phy_gates : 0) * - cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * - g_tp.peri_global.Vdd; // unit W - } - - } else { - area.set_area(0.4e6 / 2 * mcp.dataBusWidth / - 8); // area based on Cadence ChipEstimator for 8bit bus - } - - // double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power - // numbers are based on 72 bit DIMM interface power_t.readOp.dynamic *= - // phy_factor; power_t.readOp.leakage *= phy_factor; - // power_t.readOp.gate_leakage *= phy_factor; - - double long_channel_device_reduction = - longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = - power_t.readOp.leakage * long_channel_device_reduction; - - double pg_reduction = power_gating_leakage_reduction(false); - power_t.readOp.power_gated_leakage = power_t.readOp.leakage * pg_reduction; - power_t.readOp.power_gated_with_long_channel_leakage = - power_t.readOp.power_gated_leakage * long_channel_device_reduction; -} - -void MCPHY::computeEnergy(bool is_tdp) { - if (is_tdp) { - // init stats for Peak - stats_t.readAc.access = 0.5 * mcp.num_channels; // time share on buses - stats_t.writeAc.access = 0.5 * mcp.num_channels; - tdp_stats = stats_t; - } else { - // init stats for runtime power (RTP) - stats_t.readAc.access = mcp.reads; - stats_t.writeAc.access = mcp.writes; - tdp_stats = stats_t; - } - - if (is_tdp) { - double data_transfer_unit = (mc_type == MC) ? 72 : 16; /*DIMM data width*/ - power = power_t; - power.readOp.dynamic = - power.readOp.dynamic * - (mcp.peakDataTransferRate * 8 * 1e6 / 1e9 /*change to Gbs*/) * - mcp.dataBusWidth / data_transfer_unit * mcp.num_channels / - mcp.clockRate; - // divide by clock rate is for match the final computation where *clock is - // used - //(stats_t.readAc.access*power_t.readOp.dynamic+ - // stats_t.writeAc.access*power_t.readOp.dynamic); - - } else { - rt_power = power_t; - // rt_power.readOp.dynamic = - // (stats_t.readAc.access*power_t.readOp.dynamic+ - // stats_t.writeAc.access*power_t.readOp.dynamic); - - rt_power.readOp.dynamic = power_t.readOp.dynamic * - (stats_t.readAc.access + stats_t.writeAc.access) * - (mcp.llcBlockSize) * 8 / 1e9 / mcp.executionTime * - (mcp.executionTime); - rt_power.readOp.dynamic = - rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * - mcp.num_mcs * mcp.executionTime; - } -} -#endif - -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, - InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_) - : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - mcp(mcp_), MC_arb(0), frontendBuffer(0), readBuffer(0), writeBuffer(0) { - /* All computations are for a single MC - * - */ - - int tag, data; - bool is_default = true; // indication for default setup - - /* MC frontend engine channels share the same engines but logically - * partitioned For all hardware inside MC. different channels do not share - * resources. - * TODO: add docodeing/mux stage to steer memory requests to different - * channels. - */ - - // memory request reorder buffer - tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; - data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW) / 8.0)); - interface_ip.cache_sz = data * XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = - new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area() + - frontendBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + frontendBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - - // selection and arbitration logic - interface_ip.assoc = - 1; // reset to prevent unnecessary warning messages when init_interface - MC_arb = new selection_logic(is_default, - XML->sys.mc.req_window_size_per_channel, - 1, - &interface_ip, - Uncore_device); - - // read buffers. - data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation - // //8 means converting bit to Byte - interface_ip.cache_sz = - data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - 0; // XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area() + - readBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + readBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - - // write buffer - data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation - // //8 means converting bit to Byte - interface_ip.cache_sz = - data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area() + - writeBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + writeBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); -} - -void MCFrontEnd::computeEnergy(bool is_tdp) { - if (is_tdp) { - // init stats for Peak - frontendBuffer->stats_t.readAc.access = - frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = - readBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = - readBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = - writeBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = - writeBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; - - } else { - // init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * - mcp.dataBusWidth / 72; - // For each channel, each memory word need to check the address data to - // achieve best scheduling results. and this need to be done on all physical - // DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * - mcp.dataBusWidth / 72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / - mcp.dataBusWidth; // support key word first - readBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / - mcp.dataBusWidth; // support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; - } - - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - - // frontendBuffer->power_t.readOp.dynamic += - //(frontendBuffer->stats_t.readAc.access* - // (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ - // frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += - (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access) * - frontendBuffer->local_result.power.searchOp.dynamic + - frontendBuffer->stats_t.readAc.access * - frontendBuffer->local_result.power.readOp.dynamic + - frontendBuffer->stats_t.writeAc.access * - frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += - (readBuffer->stats_t.readAc.access * - readBuffer->local_result.power.readOp.dynamic + - readBuffer->stats_t.writeAc.access * - readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += - (writeBuffer->stats_t.readAc.access * - writeBuffer->local_result.power.readOp.dynamic + - writeBuffer->stats_t.writeAc.access * - writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) { - power = power + frontendBuffer->power_t + readBuffer->power_t + - writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + writeBuffer->local_result.power) * - pppm_lkg; - - } else { - rt_power = - rt_power + frontendBuffer->power_t + readBuffer->power_t + - writeBuffer->power_t + - (frontendBuffer->local_result.power + readBuffer->local_result.power + - writeBuffer->local_result.power) * - pppm_lkg; - rt_power.readOp.dynamic = - rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * - mcp.num_mcs * mcp.executionTime; - } -} - -void MCFrontEnd::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next - << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << frontendBuffer->power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? frontendBuffer->power.readOp - .power_gated_with_long_channel_leakage - : frontendBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << frontendBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - - cout << endl; - cout << indent_str << "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << readBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? readBuffer->power.readOp - .power_gated_with_long_channel_leakage - : readBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << readBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << writeBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? writeBuffer->power.readOp - .power_gated_with_long_channel_leakage - : writeBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << writeBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; - } else { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next - << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << frontendBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage - << " W" << endl; - cout << endl; - cout << indent_str << "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << readBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage - << " W" << endl; - cout << endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << writeBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage - << " W" << endl; - } -} - -MemoryController::MemoryController(ParseXML *XML_interface, - InputParameter *interface_ip_, - enum MemoryCtrl_type mc_type_) - : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - frontend(0), pipeLogic(0) { - /* All computations are for a single MC - * - */ - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - interface_ip.wt = Global; - set_mc_param(); - transecEngine.set_params(XML, mcp, &interface_ip, mc_type); - transecEngine.set_stats(mcp); - transecEngine.computeArea(); - transecEngine.computeStaticPower(); - frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); - area.set_area(area.get_area() + frontend->area.get_area()); - area.set_area(area.get_area() + transecEngine.area.get_area()); - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY.set_params(XML, mcp, &interface_ip, mc_type); - PHY.set_stats(mcp); - PHY.computeArea(); - PHY.computeStaticPower(); - area.set_area(area.get_area() + PHY.area.get_area()); - } -} - -void MemoryController::computeEnergy(bool is_tdp) { - frontend->computeEnergy(is_tdp); - transecEngine.computeDynamicPower(); - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY.computeDynamicPower(); - } - if (is_tdp) { - power = power + frontend->power + transecEngine.power; - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - power = power + PHY.power; - } - } else { - rt_power = rt_power + frontend->rt_power + transecEngine.rt_power; - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - rt_power = rt_power + PHY.rt_power; - } - } -} - -void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - if (enable) { - cout << "Memory Controller:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str - << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str - << "Runtime Dynamic = " << rt_power.readOp.dynamic / mcp.executionTime - << " W" << endl; - cout << endl; - - cout << indent_str << "Front End Engine:" << endl; - cout << indent_str_next << "Area = " << frontend->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << frontend->power.readOp.dynamic * mcp.clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? frontend->power.readOp.longer_channel_leakage - : frontend->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? frontend->power.readOp.power_gated_with_long_channel_leakage - : frontend->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << frontend->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; - transecEngine.display(indent, true); - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY.display(indent, true); - } - } -} - -void MemoryController::set_mc_param() { - if (mc_type == MC) { - mcp.clockRate = XML->sys.mc.mc_clock * 2; // DDR double pumped - mcp.clockRate *= 1e6; - mcp.executionTime = - XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); - - mcp.llcBlockSize = int(ceil(XML->sys.mc.llc_line_length / 8.0)) + - XML->sys.mc.llc_line_length; // ecc overhead - mcp.dataBusWidth = - int(ceil(XML->sys.mc.databus_width / 8.0)) + XML->sys.mc.databus_width; - mcp.addressBusWidth = int( - ceil(XML->sys.mc.addressbus_width)); // XML->sys.physical_address_width; - mcp.opcodeW = 16; - mcp.num_mcs = XML->sys.mc.number_mcs; - mcp.num_channels = XML->sys.mc.memory_channels_per_mc; - mcp.reads = XML->sys.mc.memory_reads; - mcp.writes = XML->sys.mc.memory_writes; - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better - // numbers, Run the RTL code from OpenSparc. - mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; - mcp.memRank = XML->sys.mc.number_ranks; - //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers - // PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - // PHY.llcBlocksize=llcBlockSize; - mcp.frontend_duty_cycle = 0.5; // for max power, the actual off-chip links - // is bidirectional but time shared - mcp.LVDS = XML->sys.mc.LVDS; - mcp.type = XML->sys.mc.type; - mcp.withPHY = XML->sys.mc.withPHY; - - if (XML->sys.mc.vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.mc.vdd; - interface_ip.lop_Vdd = XML->sys.mc.vdd; - interface_ip.lstp_Vdd = XML->sys.mc.vdd; - } - if (XML->sys.mc.power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = XML->sys.mc.power_gating_vcc; - } - } - // else if (mc_type==FLASHC) - // { - // mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double - // pumped mcp.clockRate *= 1e6; mcp.executionTime - // = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - // - // mcp.llcBlockSize - //=int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc - // overhead mcp.dataBusWidth - // =int(ceil(XML->sys.flashc.databus_width/8.0)) + - // XML->sys.flashc.databus_width; mcp.addressBusWidth - //=int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; - // mcp.opcodeW =16; - // mcp.num_mcs = XML->sys.flashc.number_mcs; - // mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; - // mcp.reads = XML->sys.flashc.memory_reads; - // mcp.writes = XML->sys.flashc.memory_writes; - // //+++++++++Transaction engine +++++++++++++++++ ////TODO needs - // better numbers, Run the RTL code from OpenSparc. - // mcp.peakDataTransferRate = - // XML->sys.flashc.peak_transfer_rate; mcp.memRank = - // XML->sys.flashc.number_ranks; - // //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs - // better numbers - // //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max - // power - // //PHY.llcBlocksize=llcBlockSize; - // mcp.frontend_duty_cycle = 0.5;//for max power, the actual - // off-chip links is bidirectional but time shared mcp.LVDS = - // XML->sys.flashc.LVDS; mcp.type = XML->sys.flashc.type; - // } - else { - cout << "Unknown memory controller type: neither DRAM controller nor Flash " - "controller" - << endl; - exit(0); - } -} - -MCFrontEnd ::~MCFrontEnd() { - - if (MC_arb) { - delete MC_arb; - MC_arb = 0; - } - if (frontendBuffer) { - delete frontendBuffer; - frontendBuffer = 0; - } - if (readBuffer) { - delete readBuffer; - readBuffer = 0; - } - if (writeBuffer) { - delete writeBuffer; - writeBuffer = 0; - } -} - -MemoryController ::~MemoryController() { - if (frontend) { - delete frontend; - frontend = 0; - } - if (pipeLogic) { - delete pipeLogic; - pipeLogic = 0; - } -} diff --git a/src/memoryctrl/CMakeLists.txt b/src/memoryctrl/CMakeLists.txt index acae0ec..fd0c851 100644 --- a/src/memoryctrl/CMakeLists.txt +++ b/src/memoryctrl/CMakeLists.txt @@ -1,8 +1,12 @@ add_library(memoryctrl mc_backend.h mc_backend.cc + mc_frontend.h + mc_frontend.cc mc_phy.h mc_phy.cc + memoryctrl.h + memoryctrl.cc ) target_include_directories(memoryctrl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(memoryctrl LINK_PUBLIC cacti top) diff --git a/src/memoryctrl/mc_backend.cc b/src/memoryctrl/mc_backend.cc index 5d87df5..cd7b93b 100644 --- a/src/memoryctrl/mc_backend.cc +++ b/src/memoryctrl/mc_backend.cc @@ -28,7 +28,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ - #include "mc_backend.h" #include "XML_Parse.h" @@ -46,7 +45,7 @@ #include /* - * MCBackend() + * MCPHY() * Constructor, Initializes the member variables that are shared across * methods. */ diff --git a/src/memoryctrl/mc_frontend.cc b/src/memoryctrl/mc_frontend.cc new file mode 100644 index 0000000..0b63ad8 --- /dev/null +++ b/src/memoryctrl/mc_frontend.cc @@ -0,0 +1,399 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "mc_frontend.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_) + : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), + mcp(mcp_), MC_arb(0), frontendBuffer(0), readBuffer(0), writeBuffer(0) { + /* All computations are for a single MC + * + */ + + int tag, data; + bool is_default = true; // indication for default setup + + /* MC frontend engine channels share the same engines but logically + * partitioned For all hardware inside MC. different channels do not share + * resources. + * TODO: add docodeing/mux stage to steer memory requests to different + * channels. + */ + + // memory request reorder buffer + tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; + data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW) / 8.0)); + interface_ip.cache_sz = data * XML->sys.mc.req_window_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; + frontendBuffer = + new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); + frontendBuffer->area.set_area(frontendBuffer->area.get_area() + + frontendBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + frontendBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + + // selection and arbitration logic + interface_ip.assoc = + 1; // reset to prevent unnecessary warning messages when init_interface + MC_arb = new selection_logic(is_default, + XML->sys.mc.req_window_size_per_channel, + 1, + &interface_ip, + Uncore_device); + + // read buffers. + data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation + // //8 means converting bit to Byte + interface_ip.cache_sz = + data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; + interface_ip.line_sz = data; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 0; // XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); + readBuffer->area.set_area(readBuffer->area.get_area() + + readBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + readBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + + // write buffer + data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation + // //8 means converting bit to Byte + interface_ip.cache_sz = + data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; + interface_ip.line_sz = data; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); + writeBuffer->area.set_area(writeBuffer->area.get_area() + + writeBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); + area.set_area(area.get_area() + writeBuffer->local_result.area * + XML->sys.mc.memory_channels_per_mc); +} + +void MCFrontEnd::computeEnergy(bool is_tdp) { + if (is_tdp) { + // init stats for Peak + frontendBuffer->stats_t.readAc.access = + frontendBuffer->l_ip.num_search_ports; + frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->tdp_stats = frontendBuffer->stats_t; + + readBuffer->stats_t.readAc.access = + readBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; + readBuffer->stats_t.writeAc.access = + readBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; + readBuffer->tdp_stats = readBuffer->stats_t; + + writeBuffer->stats_t.readAc.access = + writeBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; + writeBuffer->stats_t.writeAc.access = + writeBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; + writeBuffer->tdp_stats = writeBuffer->stats_t; + + } else { + // init stats for runtime power (RTP) + frontendBuffer->stats_t.readAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * + mcp.dataBusWidth / 72; + // For each channel, each memory word need to check the address data to + // achieve best scheduling results. and this need to be done on all physical + // DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 + frontendBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * + mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats = frontendBuffer->stats_t; + + readBuffer->stats_t.readAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer->rtp_stats = readBuffer->stats_t; + + writeBuffer->stats_t.readAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer->stats_t.writeAc.access = + XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer->rtp_stats = writeBuffer->stats_t; + } + + frontendBuffer->power_t.reset(); + readBuffer->power_t.reset(); + writeBuffer->power_t.reset(); + + // frontendBuffer->power_t.readOp.dynamic += + //(frontendBuffer->stats_t.readAc.access* + // (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ + // frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); + + frontendBuffer->power_t.readOp.dynamic += + (frontendBuffer->stats_t.readAc.access + + frontendBuffer->stats_t.writeAc.access) * + frontendBuffer->local_result.power.searchOp.dynamic + + frontendBuffer->stats_t.readAc.access * + frontendBuffer->local_result.power.readOp.dynamic + + frontendBuffer->stats_t.writeAc.access * + frontendBuffer->local_result.power.writeOp.dynamic; + + readBuffer->power_t.readOp.dynamic += + (readBuffer->stats_t.readAc.access * + readBuffer->local_result.power.readOp.dynamic + + readBuffer->stats_t.writeAc.access * + readBuffer->local_result.power.writeOp.dynamic); + writeBuffer->power_t.readOp.dynamic += + (writeBuffer->stats_t.readAc.access * + writeBuffer->local_result.power.readOp.dynamic + + writeBuffer->stats_t.writeAc.access * + writeBuffer->local_result.power.writeOp.dynamic); + + if (is_tdp) { + power = power + frontendBuffer->power_t + readBuffer->power_t + + writeBuffer->power_t + + (frontendBuffer->local_result.power + + readBuffer->local_result.power + writeBuffer->local_result.power) * + pppm_lkg; + + } else { + rt_power = + rt_power + frontendBuffer->power_t + readBuffer->power_t + + writeBuffer->power_t + + (frontendBuffer->local_result.power + readBuffer->local_result.power + + writeBuffer->local_result.power) * + pppm_lkg; + rt_power.readOp.dynamic = + rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * + mcp.num_mcs * mcp.executionTime; + } +} + +void MCFrontEnd::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Front End ROB:" << endl; + cout << indent_str_next + << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << frontendBuffer->power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? frontendBuffer->power.readOp + .power_gated_with_long_channel_leakage + : frontendBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << frontendBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + + cout << endl; + cout << indent_str << "Read Buffer:" << endl; + cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << readBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? readBuffer->power.readOp + .power_gated_with_long_channel_leakage + : readBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << readBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "Write Buffer:" << endl; + cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << writeBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? writeBuffer->power.readOp + .power_gated_with_long_channel_leakage + : writeBuffer->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << writeBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + } else { + cout << indent_str << "Front End ROB:" << endl; + cout << indent_str_next + << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << frontendBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + cout << endl; + cout << indent_str << "Read Buffer:" << endl; + cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << readBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + cout << endl; + cout << indent_str << "Write Buffer:" << endl; + cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << writeBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str_next + << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage + << " W" << endl; + } +} + +MCFrontEnd ::~MCFrontEnd() { + + if (MC_arb) { + delete MC_arb; + MC_arb = 0; + } + if (frontendBuffer) { + delete frontendBuffer; + frontendBuffer = 0; + } + if (readBuffer) { + delete readBuffer; + readBuffer = 0; + } + if (writeBuffer) { + delete writeBuffer; + writeBuffer = 0; + } +} diff --git a/src/memoryctrl/mc_frontend.h b/src/memoryctrl/mc_frontend.h new file mode 100644 index 0000000..87a31df --- /dev/null +++ b/src/memoryctrl/mc_frontend.h @@ -0,0 +1,63 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __MC_FRONTEND_H__ +#define __MC_FRONTEND_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "logic.h" +#include "parameter.h" + +#include + +class MCFrontEnd : public Component { +public: + ParseXML *XML; + InputParameter interface_ip; + enum MemoryCtrl_type mc_type; + MCParam mcp; + selection_logic *MC_arb; + ArrayST *frontendBuffer; + ArrayST *readBuffer; + ArrayST *writeBuffer; + + MCFrontEnd(ParseXML *XML_interface, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MCFrontEnd(); +}; + +#endif // __MC_FRONTEND_H__ diff --git a/src/memoryctrl/memoryctrl.cc b/src/memoryctrl/memoryctrl.cc new file mode 100644 index 0000000..be90392 --- /dev/null +++ b/src/memoryctrl/memoryctrl.cc @@ -0,0 +1,274 @@ +/***************************************************************************** + * McPAT * SOFTWARE LICENSE + AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ +#include "memoryctrl.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "basic_components.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +/* overview of MC models: + * McPAT memory controllers are modeled according to large number of industrial + * data points. The Basic memory controller architecture is base on the Synopsis + * designs (DesignWare DDR2/DDR3-Lite memory controllers and DDR2/DDR3-Lite + * protocol controllers) as in Cadence ChipEstimator Tool + * + * An MC has 3 parts as shown in this design. McPAT models both high performance + * MC based on Niagara processor designs and curving and low power MC based on + * data points in Cadence ChipEstimator Tool. + * + * The frontend is modeled analytically, the backend is modeled empirically + * according to DDR2/DDR3-Lite protocol controllers in Cadence ChipEstimator + * Tool The PHY is modeled based on "A 100mW 9.6Gb/s Transceiver in 90nm CMOS + * for next-generation memory interfaces ," ISSCC 2006, and A 14mW 6.25Gb/s + * Transceiver in 90nm CMOS for Serial Chip-to-Chip Communication," ISSCC 2007 + * + * In Cadence ChipEstimator Tool there are two types of memory controllers: the + * full memory controllers that includes the frontend as the DesignWare + * DDR2/DDR3-Lite memory controllers and the backend only memory controllers as + * the DDR2/DDR3-Lite protocol controllers (except DesignWare DDR2/DDR3-Lite + * memory controllers, all memory controller IP in Cadence ChipEstimator Tool + * are backend memory controllers such as DDRC 1600A and DDRC 800A). Thus,to + * some extend the area and power difference between DesignWare DDR2/DDR3-Lite + * memory controllers and DDR2/DDR3-Lite protocol controllers can be an + * estimation to the frontend power and area, which is very close the + * analitically modeled results of the frontend for Niagara2@65nm + * + */ + +MemoryController::MemoryController(ParseXML *XML_interface, + InputParameter *interface_ip_, + enum MemoryCtrl_type mc_type_) + : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), + frontend(0), pipeLogic(0) { + /* All computations are for a single MC + * + */ + interface_ip.wire_is_mat_type = 2; + interface_ip.wire_os_mat_type = 2; + interface_ip.wt = Global; + set_mc_param(); + transecEngine.set_params(XML, mcp, &interface_ip, mc_type); + transecEngine.set_stats(mcp); + transecEngine.computeArea(); + transecEngine.computeStaticPower(); + frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); + area.set_area(area.get_area() + frontend->area.get_area()); + area.set_area(area.get_area() + transecEngine.area.get_area()); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.set_params(XML, mcp, &interface_ip, mc_type); + PHY.set_stats(mcp); + PHY.computeArea(); + PHY.computeStaticPower(); + area.set_area(area.get_area() + PHY.area.get_area()); + } +} + +void MemoryController::computeEnergy(bool is_tdp) { + frontend->computeEnergy(is_tdp); + transecEngine.computeDynamicPower(); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.computeDynamicPower(); + } + if (is_tdp) { + power = power + frontend->power + transecEngine.power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + power = power + PHY.power; + } + } else { + rt_power = rt_power + frontend->rt_power + transecEngine.rt_power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + rt_power = rt_power + PHY.rt_power; + } + } +} + +void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + if (enable) { + cout << "Memory Controller:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" + << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic / mcp.executionTime + << " W" << endl; + cout << endl; + + cout << indent_str << "Front End Engine:" << endl; + cout << indent_str_next << "Area = " << frontend->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << frontend->power.readOp.dynamic * mcp.clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? frontend->power.readOp.longer_channel_leakage + : frontend->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? frontend->power.readOp.power_gated_with_long_channel_leakage + : frontend->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << frontend->rt_power.readOp.dynamic / mcp.executionTime << " W" + << endl; + cout << endl; + transecEngine.display(indent, true); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.display(indent, true); + } + } +} + +void MemoryController::set_mc_param() { + if (mc_type == MC) { + mcp.clockRate = XML->sys.mc.mc_clock * 2; // DDR double pumped + mcp.clockRate *= 1e6; + mcp.executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + + mcp.llcBlockSize = int(ceil(XML->sys.mc.llc_line_length / 8.0)) + + XML->sys.mc.llc_line_length; // ecc overhead + mcp.dataBusWidth = + int(ceil(XML->sys.mc.databus_width / 8.0)) + XML->sys.mc.databus_width; + mcp.addressBusWidth = int( + ceil(XML->sys.mc.addressbus_width)); // XML->sys.physical_address_width; + mcp.opcodeW = 16; + mcp.num_mcs = XML->sys.mc.number_mcs; + mcp.num_channels = XML->sys.mc.memory_channels_per_mc; + mcp.reads = XML->sys.mc.memory_reads; + mcp.writes = XML->sys.mc.memory_writes; + //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better + // numbers, Run the RTL code from OpenSparc. + mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; + mcp.memRank = XML->sys.mc.number_ranks; + //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers + // PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power + // PHY.llcBlocksize=llcBlockSize; + mcp.frontend_duty_cycle = 0.5; // for max power, the actual off-chip links + // is bidirectional but time shared + mcp.LVDS = XML->sys.mc.LVDS; + mcp.type = XML->sys.mc.type; + mcp.withPHY = XML->sys.mc.withPHY; + + if (XML->sys.mc.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.mc.vdd; + interface_ip.lop_Vdd = XML->sys.mc.vdd; + interface_ip.lstp_Vdd = XML->sys.mc.vdd; + } + if (XML->sys.mc.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = XML->sys.mc.power_gating_vcc; + } + } + // else if (mc_type==FLASHC) + // { + // mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double + // pumped mcp.clockRate *= 1e6; mcp.executionTime + // = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); + // + // mcp.llcBlockSize + //=int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc + // overhead mcp.dataBusWidth + // =int(ceil(XML->sys.flashc.databus_width/8.0)) + + // XML->sys.flashc.databus_width; mcp.addressBusWidth + //=int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; + // mcp.opcodeW =16; + // mcp.num_mcs = XML->sys.flashc.number_mcs; + // mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; + // mcp.reads = XML->sys.flashc.memory_reads; + // mcp.writes = XML->sys.flashc.memory_writes; + // //+++++++++Transaction engine +++++++++++++++++ ////TODO needs + // better numbers, Run the RTL code from OpenSparc. + // mcp.peakDataTransferRate = + // XML->sys.flashc.peak_transfer_rate; mcp.memRank = + // XML->sys.flashc.number_ranks; + // //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs + // better numbers + // //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max + // power + // //PHY.llcBlocksize=llcBlockSize; + // mcp.frontend_duty_cycle = 0.5;//for max power, the actual + // off-chip links is bidirectional but time shared mcp.LVDS = + // XML->sys.flashc.LVDS; mcp.type = XML->sys.flashc.type; + // } + else { + cout << "Unknown memory controller type: neither DRAM controller nor Flash " + "controller" + << endl; + exit(0); + } +} + +MemoryController ::~MemoryController() { + if (frontend) { + delete frontend; + frontend = 0; + } + if (pipeLogic) { + delete pipeLogic; + pipeLogic = 0; + } +} diff --git a/src/memoryctrl.h b/src/memoryctrl/memoryctrl.h similarity index 61% rename from src/memoryctrl.h rename to src/memoryctrl/memoryctrl.h index 39892cd..cea2052 100644 --- a/src/memoryctrl.h +++ b/src/memoryctrl/memoryctrl.h @@ -29,81 +29,20 @@ * ***************************************************************************/ -#ifndef MEMORYCTRL_H_ -#define MEMORYCTRL_H_ +#ifndef __MEMORYCTRL_H__ +#define __MEMORYCTRL_H__ #include "XML_Parse.h" #include "array.h" #include "basic_components.h" #include "logic.h" #include "mc_backend.h" +#include "mc_frontend.h" #include "mc_phy.h" #include "parameter.h" #include -#if 0 -class MCBackend : public Component { -public: - InputParameter l_ip; - uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCBackend(InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~MCBackend(){}; -}; -#endif - -#if 0 -class MCPHY : public Component { -public: - InputParameter l_ip; - uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCPHY(InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~MCPHY(){}; -}; -#endif - -class MCFrontEnd : public Component { -public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - selection_logic *MC_arb; - ArrayST *frontendBuffer; - ArrayST *readBuffer; - ArrayST *writeBuffer; - - MCFrontEnd(ParseXML *XML_interface, - InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~MCFrontEnd(); -}; - class MemoryController : public Component { public: ParseXML *XML; @@ -124,4 +63,4 @@ class MemoryController : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool enable = true); ~MemoryController(); }; -#endif /* MEMORYCTRL_H_ */ +#endif /* __MEMORYCTRL_H__ */ From ee8d1ea2975c48fbce5bef3db8e14412118944f7 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 10 Jun 2020 23:30:22 -0500 Subject: [PATCH 15/59] refactor-serialization: Refactored MC Front End Refactored the Memory Controller Front End Module to separate the Area calculations from the Power Calculations --- src/memoryctrl/mc_frontend.cc | 692 +++++++++++++++++++--------------- src/memoryctrl/mc_frontend.h | 49 ++- src/memoryctrl/memoryctrl.cc | 47 +-- src/memoryctrl/memoryctrl.h | 2 +- 4 files changed, 428 insertions(+), 362 deletions(-) diff --git a/src/memoryctrl/mc_frontend.cc b/src/memoryctrl/mc_frontend.cc index 0b63ad8..0c0ab51 100644 --- a/src/memoryctrl/mc_frontend.cc +++ b/src/memoryctrl/mc_frontend.cc @@ -45,15 +45,34 @@ #include #include -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, - InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_) - : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - mcp(mcp_), MC_arb(0), frontendBuffer(0), readBuffer(0), writeBuffer(0) { - /* All computations are for a single MC - * - */ +MCFrontEnd::MCFrontEnd() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; + memory_channels_per_mc = 0; + physical_address_width = 0; + req_window_size_per_channel = 0; + IO_buffer_size_per_channel = 0; + memory_reads = 0; + memory_writes = 0; +} + +void MCFrontEnd::set_params(const ParseXML *XML, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_) { + fe_ip = *interface_ip_; + rb_ip = *interface_ip_; + wb_ip = *interface_ip_; + mcp = mcp_; + mc_type = mc_type_; + memory_channels_per_mc = XML->sys.mc.memory_channels_per_mc; + physical_address_width = XML->sys.physical_address_width; + req_window_size_per_channel = XML->sys.mc.req_window_size_per_channel; + IO_buffer_size_per_channel = XML->sys.mc.IO_buffer_size_per_channel; + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; int tag, data; bool is_default = true; // indication for default setup @@ -67,333 +86,378 @@ MCFrontEnd::MCFrontEnd(ParseXML *XML_interface, // memory request reorder buffer tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; - data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW) / 8.0)); - interface_ip.cache_sz = data * XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = - new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area() + - frontendBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + frontendBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); + data = int(ceil((physical_address_width + mcp.opcodeW) / 8.0)); + fe_ip.cache_sz = data * req_window_size_per_channel; + fe_ip.line_sz = data; + fe_ip.assoc = 0; + fe_ip.nbanks = 1; + fe_ip.out_w = fe_ip.line_sz * 8; + fe_ip.specific_tag = 1; + fe_ip.tag_w = tag; + fe_ip.access_mode = 0; + fe_ip.throughput = 1.0 / mcp.clockRate; + fe_ip.latency = 1.0 / mcp.clockRate; + fe_ip.is_cache = true; + fe_ip.pure_cam = false; + fe_ip.pure_ram = false; + fe_ip.obj_func_dyn_energy = 0; + fe_ip.obj_func_dyn_power = 0; + fe_ip.obj_func_leak_power = 0; + fe_ip.obj_func_cycle_t = 1; + fe_ip.num_rw_ports = 0; + fe_ip.num_rd_ports = memory_channels_per_mc; + fe_ip.num_wr_ports = fe_ip.num_rd_ports; + fe_ip.num_se_rd_ports = 0; + fe_ip.num_search_ports = memory_channels_per_mc; + frontendBuffer.set_params(&fe_ip, "MC ReorderBuffer", Uncore_device); // selection and arbitration logic - interface_ip.assoc = + fe_ip.assoc = 1; // reset to prevent unnecessary warning messages when init_interface - MC_arb = new selection_logic(is_default, - XML->sys.mc.req_window_size_per_channel, - 1, - &interface_ip, - Uncore_device); + MC_arb = new selection_logic( + is_default, req_window_size_per_channel, 1, &rb_ip, Uncore_device); + + // TODO: Verify whether or not this is a bug, Originally these were all + // the same interface_ip which leads to slight differences in power if + // removed. + rb_ip = fe_ip; // read buffers. data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation // //8 means converting bit to Byte - interface_ip.cache_sz = - data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - 0; // XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area() + - readBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + readBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); + rb_ip.cache_sz = data * IO_buffer_size_per_channel; //*llcBlockSize; + rb_ip.line_sz = data; + rb_ip.assoc = 1; + rb_ip.nbanks = 1; + rb_ip.out_w = rb_ip.line_sz * 8; + rb_ip.access_mode = 1; + rb_ip.throughput = 1.0 / mcp.clockRate; + rb_ip.latency = 1.0 / mcp.clockRate; + rb_ip.is_cache = false; + rb_ip.pure_cam = false; + rb_ip.pure_ram = true; + rb_ip.obj_func_dyn_energy = 0; + rb_ip.obj_func_dyn_power = 0; + rb_ip.obj_func_leak_power = 0; + rb_ip.obj_func_cycle_t = 1; + rb_ip.num_rw_ports = + 0; // memory_channels_per_mc*2>2?2:memory_channels_per_mc*2; + rb_ip.num_rd_ports = memory_channels_per_mc; + rb_ip.num_wr_ports = rb_ip.num_rd_ports; + rb_ip.num_se_rd_ports = 0; + readBuffer.set_params(&rb_ip, "MC ReadBuffer", Uncore_device); + + // TODO: Verify whether or not this is a bug, Originally these were all + // the same interface_ip which leads to slight differences in power if + // removed. + wb_ip = rb_ip; // write buffer data = (int)ceil(mcp.dataBusWidth / 8.0); // Support key words first operation // //8 means converting bit to Byte - interface_ip.cache_sz = - data * XML->sys.mc.IO_buffer_size_per_channel; //*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / mcp.clockRate; - interface_ip.latency = 1.0 / mcp.clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area() + - writeBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area() + writeBuffer->local_result.area * - XML->sys.mc.memory_channels_per_mc); + wb_ip.cache_sz = data * IO_buffer_size_per_channel; //*llcBlockSize; + wb_ip.line_sz = data; + wb_ip.assoc = 1; + wb_ip.nbanks = 1; + wb_ip.out_w = wb_ip.line_sz * 8; + wb_ip.access_mode = 0; + wb_ip.throughput = 1.0 / mcp.clockRate; + wb_ip.latency = 1.0 / mcp.clockRate; + wb_ip.obj_func_dyn_energy = 0; + wb_ip.obj_func_dyn_power = 0; + wb_ip.obj_func_leak_power = 0; + wb_ip.obj_func_cycle_t = 1; + wb_ip.num_rw_ports = 0; + wb_ip.num_rd_ports = memory_channels_per_mc; + wb_ip.num_wr_ports = wb_ip.num_rd_ports; + wb_ip.num_se_rd_ports = 0; + writeBuffer.set_params(&wb_ip, "MC writeBuffer", Uncore_device); + init_params = true; } -void MCFrontEnd::computeEnergy(bool is_tdp) { - if (is_tdp) { - // init stats for Peak - frontendBuffer->stats_t.readAc.access = - frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = - readBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = - readBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = - writeBuffer->l_ip.num_rd_ports * mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = - writeBuffer->l_ip.num_wr_ports * mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; - - } else { - // init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * - mcp.dataBusWidth / 72; - // For each channel, each memory word need to check the address data to - // achieve best scheduling results. and this need to be done on all physical - // DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth * - mcp.dataBusWidth / 72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / - mcp.dataBusWidth; // support key word first - readBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_reads * mcp.llcBlockSize * 8.0 / - mcp.dataBusWidth; // support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = - XML->sys.mc.memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; +void MCFrontEnd::set_stats(const ParseXML *XML, const MCParam &mcp_) { + memory_reads = XML->sys.mc.memory_reads; + memory_writes = XML->sys.mc.memory_writes; + mcp = mcp_; + init_stats = true; +} + +void MCFrontEnd::computeArea() { + if (!init_params) { + std::cerr << "[ MCFrontEnd ] Error: must set params before calling " + "computeArea()\n"; + exit(1); } + // Front End Buffer Area Calculation + frontendBuffer.computeArea(); + frontendBuffer.area.set_area(frontendBuffer.area.get_area() + + frontendBuffer.local_result.area * + memory_channels_per_mc); + area.set_area(area.get_area() + + frontendBuffer.local_result.area * memory_channels_per_mc); + + // Read Buffer Area Calculation + readBuffer.computeArea(); + readBuffer.area.set_area(readBuffer.area.get_area() + + readBuffer.local_result.area * + memory_channels_per_mc); + area.set_area(area.get_area() + + readBuffer.local_result.area * memory_channels_per_mc); + + // Write Buffer Area Calculation + writeBuffer.computeArea(); + writeBuffer.area.set_area(writeBuffer.area.get_area() + + writeBuffer.local_result.area * + memory_channels_per_mc); + area.set_area(area.get_area() + + writeBuffer.local_result.area * memory_channels_per_mc); +} - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - - // frontendBuffer->power_t.readOp.dynamic += - //(frontendBuffer->stats_t.readAc.access* - // (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ - // frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += - (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access) * - frontendBuffer->local_result.power.searchOp.dynamic + - frontendBuffer->stats_t.readAc.access * - frontendBuffer->local_result.power.readOp.dynamic + - frontendBuffer->stats_t.writeAc.access * - frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += - (readBuffer->stats_t.readAc.access * - readBuffer->local_result.power.readOp.dynamic + - readBuffer->stats_t.writeAc.access * - readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += - (writeBuffer->stats_t.readAc.access * - writeBuffer->local_result.power.readOp.dynamic + - writeBuffer->stats_t.writeAc.access * - writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) { - power = power + frontendBuffer->power_t + readBuffer->power_t + - writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + writeBuffer->local_result.power) * - pppm_lkg; - - } else { - rt_power = - rt_power + frontendBuffer->power_t + readBuffer->power_t + - writeBuffer->power_t + - (frontendBuffer->local_result.power + readBuffer->local_result.power + - writeBuffer->local_result.power) * - pppm_lkg; - rt_power.readOp.dynamic = - rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * - mcp.num_mcs * mcp.executionTime; +void MCFrontEnd::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. +} + +void MCFrontEnd::computeDynamicPower() { + if (!init_stats) { + std::cerr << "[ MCFrontEnd ] Error: must set params before calling " + "computeDynamicPower()\n"; + exit(1); } + // stats for peak power (TDP) + computeFrontEndTDP(); + computeReadBufferTDP(); + computeWriteBufferTDP(); + + power = frontendBuffer.power_t + readBuffer.power_t + writeBuffer.power_t + + (frontendBuffer.local_result.power + readBuffer.local_result.power + + writeBuffer.local_result.power) * + pppm_lkg; + + // stats for runtime power (RTP) + computeFrontEndRTP(); + computeReadBufferRTP(); + computeWriteBufferRTP(); + + rt_power = frontendBuffer.power_t + readBuffer.power_t + writeBuffer.power_t + + (frontendBuffer.local_result.power + + readBuffer.local_result.power + writeBuffer.local_result.power) * + pppm_lkg; + rt_power.readOp.dynamic = + rt_power.readOp.dynamic + power.readOp.dynamic * 0.1 * mcp.clockRate * + mcp.num_mcs * mcp.executionTime; +} + +void MCFrontEnd::computeFrontEndRTP() { + frontendBuffer.stats_t.readAc.access = memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth * mcp.dataBusWidth / + 72; + // For each channel, each memory word need to check the address data to + // achieve best scheduling results. and this need to be done on all physical + // DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 + frontendBuffer.stats_t.writeAc.access = memory_writes * mcp.llcBlockSize * + 8.0 / mcp.dataBusWidth * + mcp.dataBusWidth / 72; + frontendBuffer.rtp_stats = frontendBuffer.stats_t; + + frontendBuffer.power_t.readOp.dynamic = + (frontendBuffer.rtp_stats.readAc.access + + frontendBuffer.rtp_stats.writeAc.access) * + frontendBuffer.local_result.power.searchOp.dynamic + + frontendBuffer.rtp_stats.readAc.access * + frontendBuffer.local_result.power.readOp.dynamic + + frontendBuffer.rtp_stats.writeAc.access * + frontendBuffer.local_result.power.writeOp.dynamic; +} + +void MCFrontEnd::computeReadBufferRTP() { + readBuffer.stats_t.readAc.access = memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer.stats_t.writeAc.access = + memory_reads * mcp.llcBlockSize * 8.0 / + mcp.dataBusWidth; // support key word first + readBuffer.rtp_stats = readBuffer.stats_t; + + readBuffer.power_t.readOp.dynamic = + (readBuffer.rtp_stats.readAc.access * + readBuffer.local_result.power.readOp.dynamic + + readBuffer.rtp_stats.writeAc.access * + readBuffer.local_result.power.writeOp.dynamic); +} + +void MCFrontEnd::computeWriteBufferRTP() { + writeBuffer.stats_t.readAc.access = + memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer.stats_t.writeAc.access = + memory_writes * mcp.llcBlockSize * 8.0 / mcp.dataBusWidth; + writeBuffer.rtp_stats = writeBuffer.stats_t; + + writeBuffer.power_t.readOp.dynamic = + (writeBuffer.rtp_stats.readAc.access * + writeBuffer.local_result.power.readOp.dynamic + + writeBuffer.rtp_stats.writeAc.access * + writeBuffer.local_result.power.writeOp.dynamic); +} + +void MCFrontEnd::computeFrontEndTDP() { + frontendBuffer.stats_t.readAc.access = frontendBuffer.l_ip.num_search_ports; + frontendBuffer.stats_t.writeAc.access = frontendBuffer.l_ip.num_wr_ports; + frontendBuffer.tdp_stats = frontendBuffer.stats_t; + + frontendBuffer.power_t.readOp.dynamic = + (frontendBuffer.tdp_stats.readAc.access + + frontendBuffer.tdp_stats.writeAc.access) * + frontendBuffer.local_result.power.searchOp.dynamic + + frontendBuffer.tdp_stats.readAc.access * + frontendBuffer.local_result.power.readOp.dynamic + + frontendBuffer.tdp_stats.writeAc.access * + frontendBuffer.local_result.power.writeOp.dynamic; +} + +void MCFrontEnd::computeReadBufferTDP() { + readBuffer.stats_t.readAc.access = + readBuffer.l_ip.num_rd_ports * mcp.frontend_duty_cycle; + readBuffer.stats_t.writeAc.access = + readBuffer.l_ip.num_wr_ports * mcp.frontend_duty_cycle; + readBuffer.tdp_stats = readBuffer.stats_t; + + readBuffer.power_t.readOp.dynamic = + (readBuffer.tdp_stats.readAc.access * + readBuffer.local_result.power.readOp.dynamic + + readBuffer.tdp_stats.writeAc.access * + readBuffer.local_result.power.writeOp.dynamic); } -void MCFrontEnd::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +void MCFrontEnd::computeWriteBufferTDP() { + writeBuffer.stats_t.readAc.access = + writeBuffer.l_ip.num_rd_ports * mcp.frontend_duty_cycle; + writeBuffer.stats_t.writeAc.access = + writeBuffer.l_ip.num_wr_ports * mcp.frontend_duty_cycle; + writeBuffer.tdp_stats = writeBuffer.stats_t; + + writeBuffer.power_t.readOp.dynamic = + (writeBuffer.tdp_stats.readAc.access * + writeBuffer.local_result.power.readOp.dynamic + + writeBuffer.tdp_stats.writeAc.access * + writeBuffer.local_result.power.writeOp.dynamic); +} + +void MCFrontEnd::display(uint32_t indent, bool enable, bool detailed) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next - << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << frontendBuffer->power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? frontendBuffer->power.readOp - .power_gated_with_long_channel_leakage - : frontendBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << frontendBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - - cout << endl; - cout << indent_str << "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << readBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << readBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? readBuffer->power.readOp - .power_gated_with_long_channel_leakage - : readBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << readBuffer->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << readBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << writeBuffer->power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << writeBuffer->power.readOp.leakage - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? writeBuffer->power.readOp - .power_gated_with_long_channel_leakage - : writeBuffer->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << writeBuffer->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << writeBuffer->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; - } else { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next - << "Area = " << frontendBuffer->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << frontendBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << frontendBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontendBuffer->rt_power.readOp.gate_leakage - << " W" << endl; - cout << endl; - cout << indent_str << "Read Buffer:" << endl; - cout << indent_str_next << "Area = " << readBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << readBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << readBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << readBuffer->rt_power.readOp.gate_leakage - << " W" << endl; - cout << endl; - cout << indent_str << "Write Buffer:" << endl; - cout << indent_str_next << "Area = " << writeBuffer->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << writeBuffer->rt_power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str_next - << "Subthreshold Leakage = " << writeBuffer->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << writeBuffer->rt_power.readOp.gate_leakage - << " W" << endl; + + if (enable) { + std::cout << indent_str << "Front End Engine:" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate + << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / mcp.executionTime << " W" + << std::endl; + std::cout << std::endl; + if (detailed) { + indent += 4; + indent_str = std::string(indent, ' '); + indent_str_next = std::string(indent + 2, ' '); + std::cout << indent_str << "Front End ROB:" << std::endl; + std::cout << indent_str_next + << "Area = " << frontendBuffer.area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str_next << "Peak Dynamic = " + << frontendBuffer.power.readOp.dynamic * mcp.clockRate << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << frontendBuffer.power.readOp.leakage << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? frontendBuffer.power.readOp + .power_gated_with_long_channel_leakage + : frontendBuffer.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << frontendBuffer.power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << frontendBuffer.rt_power.readOp.dynamic / mcp.executionTime + << " W" << std::endl; + + std::cout << std::endl; + std::cout << indent_str << "Read Buffer:" << std::endl; + std::cout << indent_str_next + << "Area = " << readBuffer.area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str_next << "Peak Dynamic = " + << readBuffer.power.readOp.dynamic * mcp.clockRate << " W" + << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage = " << readBuffer.power.readOp.leakage + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? readBuffer.power.readOp + .power_gated_with_long_channel_leakage + : readBuffer.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << readBuffer.power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << readBuffer.rt_power.readOp.dynamic / mcp.executionTime + << " W" << std::endl; + std::cout << std::endl; + std::cout << indent_str << "Write Buffer:" << std::endl; + std::cout << indent_str_next + << "Area = " << writeBuffer.area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str_next << "Peak Dynamic = " + << writeBuffer.power.readOp.dynamic * mcp.clockRate << " W" + << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage = " << writeBuffer.power.readOp.leakage + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? writeBuffer.power.readOp + .power_gated_with_long_channel_leakage + : writeBuffer.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << writeBuffer.power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << writeBuffer.rt_power.readOp.dynamic / mcp.executionTime + << " W" << std::endl; + std::cout << std::endl; + } } } MCFrontEnd ::~MCFrontEnd() { - - if (MC_arb) { - delete MC_arb; - MC_arb = 0; - } - if (frontendBuffer) { - delete frontendBuffer; - frontendBuffer = 0; - } - if (readBuffer) { - delete readBuffer; - readBuffer = 0; - } - if (writeBuffer) { - delete writeBuffer; - writeBuffer = 0; - } + // Do Nothing } diff --git a/src/memoryctrl/mc_frontend.h b/src/memoryctrl/mc_frontend.h index 87a31df..43aa3f0 100644 --- a/src/memoryctrl/mc_frontend.h +++ b/src/memoryctrl/mc_frontend.h @@ -42,22 +42,47 @@ class MCFrontEnd : public Component { public: - ParseXML *XML; - InputParameter interface_ip; + InputParameter sl_ip; + InputParameter fe_ip; + InputParameter rb_ip; + InputParameter wb_ip; enum MemoryCtrl_type mc_type; MCParam mcp; selection_logic *MC_arb; - ArrayST *frontendBuffer; - ArrayST *readBuffer; - ArrayST *writeBuffer; - - MCFrontEnd(ParseXML *XML_interface, - InputParameter *interface_ip_, - const MCParam &mcp_, - enum MemoryCtrl_type mc_type_); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ArrayST frontendBuffer; + ArrayST readBuffer; + ArrayST writeBuffer; + + MCFrontEnd(); + void set_params(const ParseXML *XML, + InputParameter *interface_ip_, + const MCParam &mcp_, + enum MemoryCtrl_type mc_type_); + void set_stats(const ParseXML *XML, const MCParam &mcp_); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true, bool detailed = false); ~MCFrontEnd(); + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; + int memory_channels_per_mc; + int physical_address_width; + int req_window_size_per_channel; + int IO_buffer_size_per_channel; + int memory_reads; + int memory_writes; + + void computeFrontEndRTP(); + void computeReadBufferRTP(); + void computeWriteBufferRTP(); + void computeFrontEndTDP(); + void computeReadBufferTDP(); + void computeWriteBufferTDP(); }; #endif // __MC_FRONTEND_H__ diff --git a/src/memoryctrl/memoryctrl.cc b/src/memoryctrl/memoryctrl.cc index be90392..b456b47 100644 --- a/src/memoryctrl/memoryctrl.cc +++ b/src/memoryctrl/memoryctrl.cc @@ -77,7 +77,7 @@ MemoryController::MemoryController(ParseXML *XML_interface, InputParameter *interface_ip_, enum MemoryCtrl_type mc_type_) : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - frontend(0), pipeLogic(0) { + pipeLogic(0) { /* All computations are for a single MC * */ @@ -89,8 +89,11 @@ MemoryController::MemoryController(ParseXML *XML_interface, transecEngine.set_stats(mcp); transecEngine.computeArea(); transecEngine.computeStaticPower(); - frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); - area.set_area(area.get_area() + frontend->area.get_area()); + frontend.set_params(XML, &interface_ip, mcp, mc_type); + frontend.set_stats(XML, mcp); + frontend.computeArea(); + frontend.computeStaticPower(); + area.set_area(area.get_area() + frontend.area.get_area()); area.set_area(area.get_area() + transecEngine.area.get_area()); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY.set_params(XML, mcp, &interface_ip, mc_type); @@ -102,18 +105,18 @@ MemoryController::MemoryController(ParseXML *XML_interface, } void MemoryController::computeEnergy(bool is_tdp) { - frontend->computeEnergy(is_tdp); + frontend.computeDynamicPower(); transecEngine.computeDynamicPower(); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY.computeDynamicPower(); } if (is_tdp) { - power = power + frontend->power + transecEngine.power; + power = power + frontend.power + transecEngine.power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { power = power + PHY.power; } } else { - rt_power = rt_power + frontend->rt_power + transecEngine.rt_power; + rt_power = rt_power + frontend.rt_power + transecEngine.rt_power; if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { rt_power = rt_power + PHY.rt_power; } @@ -136,11 +139,12 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { << (long_channel ? power.readOp.longer_channel_leakage : power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str << "Subthreshold Leakage with power gating = " << (long_channel ? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; cout << indent_str @@ -148,30 +152,7 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { << " W" << endl; cout << endl; - cout << indent_str << "Front End Engine:" << endl; - cout << indent_str_next << "Area = " << frontend->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << frontend->power.readOp.dynamic * mcp.clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? frontend->power.readOp.longer_channel_leakage - : frontend->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? frontend->power.readOp.power_gated_with_long_channel_leakage - : frontend->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << frontend->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << frontend->rt_power.readOp.dynamic / mcp.executionTime << " W" - << endl; - cout << endl; + frontend.display(indent, true); transecEngine.display(indent, true); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY.display(indent, true); @@ -263,10 +244,6 @@ void MemoryController::set_mc_param() { } MemoryController ::~MemoryController() { - if (frontend) { - delete frontend; - frontend = 0; - } if (pipeLogic) { delete pipeLogic; pipeLogic = 0; diff --git a/src/memoryctrl/memoryctrl.h b/src/memoryctrl/memoryctrl.h index cea2052..d13be52 100644 --- a/src/memoryctrl/memoryctrl.h +++ b/src/memoryctrl/memoryctrl.h @@ -49,7 +49,7 @@ class MemoryController : public Component { InputParameter interface_ip; enum MemoryCtrl_type mc_type; MCParam mcp; - MCFrontEnd *frontend; + MCFrontEnd frontend; MCBackend transecEngine; MCPHY PHY; Pipeline *pipeLogic; From 1a091faea32887c2ee24213970cacb9dcb0c5f04 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 11 Jun 2020 00:09:50 -0500 Subject: [PATCH 16/59] refactor-serialization: Refactored Memory Controller Refactored the Memory Controller Module to separate the Area calculations from the Power Calculations --- src/memoryctrl/mc_phy.cc | 4 +- src/memoryctrl/memoryctrl.cc | 164 ++++++++++++++++++++++------------- src/memoryctrl/memoryctrl.h | 26 ++++-- src/processor.cc | 30 +++---- src/processor.h | 2 +- 5 files changed, 140 insertions(+), 86 deletions(-) diff --git a/src/memoryctrl/mc_phy.cc b/src/memoryctrl/mc_phy.cc index 52f73f5..c858e3d 100644 --- a/src/memoryctrl/mc_phy.cc +++ b/src/memoryctrl/mc_phy.cc @@ -250,7 +250,7 @@ void MCPHY::computeDynamicPower() { * const MCParam&, * InputParameter, * const enum MemoryCtrl_type) - * Sets the parts of the flash controller params that contribute to area and + * Sets the parts of the MCPHY params that contribute to area and * static power. Must be called before computing area or static power. * Side Effects: * sets the interface_ip struct, and sets the params struct to the @@ -277,7 +277,7 @@ void MCPHY::set_params(const ParseXML *XML, /* * set_stats(const MCParam&) - * Sets the parts of the flash controller params that contribute to dynamic + * Sets the parts of the MCPHY params that contribute to dynamic * power. * Side Effects: * Store duty cycle and and percentage load into fc params, sets diff --git a/src/memoryctrl/memoryctrl.cc b/src/memoryctrl/memoryctrl.cc index b456b47..0b01632 100644 --- a/src/memoryctrl/memoryctrl.cc +++ b/src/memoryctrl/memoryctrl.cc @@ -73,84 +73,132 @@ * */ -MemoryController::MemoryController(ParseXML *XML_interface, - InputParameter *interface_ip_, - enum MemoryCtrl_type mc_type_) - : XML(XML_interface), interface_ip(*interface_ip_), mc_type(mc_type_), - pipeLogic(0) { - /* All computations are for a single MC - * - */ +MemoryController::MemoryController() { + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; + set_area = false; + mc_type = MC; +} + +void MemoryController::set_params(const ParseXML *XML, + InputParameter *interface_ip_, + enum MemoryCtrl_type mc_type_) { + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + interface_ip = *interface_ip_; + mc_type = mc_type_; + interface_ip.wire_is_mat_type = 2; interface_ip.wire_os_mat_type = 2; interface_ip.wt = Global; - set_mc_param(); + + set_mc_param(XML); + transecEngine.set_params(XML, mcp, &interface_ip, mc_type); - transecEngine.set_stats(mcp); - transecEngine.computeArea(); - transecEngine.computeStaticPower(); frontend.set_params(XML, &interface_ip, mcp, mc_type); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.set_params(XML, mcp, &interface_ip, mc_type); + } + init_params = true; +} + +void MemoryController::set_stats(const ParseXML *XML) { + set_mc_param(XML); + transecEngine.set_stats(mcp); frontend.set_stats(XML, mcp); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.set_stats(mcp); + } + init_stats = true; +} + +void MemoryController::computeArea() { + if (!init_params) { + std::cerr << "[ MemoryController ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + transecEngine.computeArea(); frontend.computeArea(); - frontend.computeStaticPower(); area.set_area(area.get_area() + frontend.area.get_area()); area.set_area(area.get_area() + transecEngine.area.get_area()); if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - PHY.set_params(XML, mcp, &interface_ip, mc_type); - PHY.set_stats(mcp); PHY.computeArea(); - PHY.computeStaticPower(); area.set_area(area.get_area() + PHY.area.get_area()); } + set_area = true; +} + +void MemoryController::computeStaticPower() { + if (!init_params) { + std::cerr << "[ MemoryController ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + if (!set_area) { + std::cerr << "[ MemoryController ] Error: must computeArea before calling " + "computeStaticPower()\n"; + exit(1); + } + transecEngine.computeStaticPower(); + frontend.computeStaticPower(); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + PHY.computeStaticPower(); + } } -void MemoryController::computeEnergy(bool is_tdp) { +void MemoryController::computeDynamicPower() { + if (!init_stats) { + std::cerr << "[ MemoryController ] Error: must set stats before calling " + "computeDynamicPower()\n"; + exit(1); + } frontend.computeDynamicPower(); transecEngine.computeDynamicPower(); + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { PHY.computeDynamicPower(); } - if (is_tdp) { - power = power + frontend.power + transecEngine.power; - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - power = power + PHY.power; - } - } else { - rt_power = rt_power + frontend.rt_power + transecEngine.rt_power; - if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { - rt_power = rt_power + PHY.rt_power; - } + + power = frontend.power + transecEngine.power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + power = power + PHY.power; + } + rt_power = frontend.rt_power + transecEngine.rt_power; + if (mcp.type == 0 || (mcp.type == 1 && mcp.withPHY)) { + rt_power = rt_power + PHY.rt_power; } } -void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { +void MemoryController::display(uint32_t indent, bool enable) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; if (enable) { - cout << "Memory Controller:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str - << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate << " W" - << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; + std::cout << "Memory Controller:" << std::endl; + std::cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str + << "Peak Dynamic = " << power.readOp.dynamic * mcp.clockRate + << " W" << std::endl; + std::cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) { - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; + std::cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; } - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str - << "Runtime Dynamic = " << rt_power.readOp.dynamic / mcp.executionTime - << " W" << endl; - cout << endl; + std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str << "Runtime Dynamic = " + << rt_power.readOp.dynamic / mcp.executionTime << " W" + << std::endl; + std::cout << std::endl; frontend.display(indent, true); transecEngine.display(indent, true); @@ -160,7 +208,7 @@ void MemoryController::displayEnergy(uint32_t indent, int plevel, bool enable) { } } -void MemoryController::set_mc_param() { +void MemoryController::set_mc_param(const ParseXML *XML) { if (mc_type == MC) { mcp.clockRate = XML->sys.mc.mc_clock * 2; // DDR double pumped mcp.clockRate *= 1e6; @@ -236,16 +284,14 @@ void MemoryController::set_mc_param() { // XML->sys.flashc.LVDS; mcp.type = XML->sys.flashc.type; // } else { - cout << "Unknown memory controller type: neither DRAM controller nor Flash " - "controller" - << endl; - exit(0); + std::cerr << "[ MemoryController ] Unknown memory controller type: neither " + "DRAM controller nor Flash " + "controller" + << std::endl; + exit(1); } } MemoryController ::~MemoryController() { - if (pipeLogic) { - delete pipeLogic; - pipeLogic = 0; - } + // Do Nothing } diff --git a/src/memoryctrl/memoryctrl.h b/src/memoryctrl/memoryctrl.h index d13be52..18907e1 100644 --- a/src/memoryctrl/memoryctrl.h +++ b/src/memoryctrl/memoryctrl.h @@ -45,22 +45,32 @@ class MemoryController : public Component { public: - ParseXML *XML; InputParameter interface_ip; enum MemoryCtrl_type mc_type; MCParam mcp; MCFrontEnd frontend; MCBackend transecEngine; MCPHY PHY; - Pipeline *pipeLogic; // clock_network clockNetwork; - MemoryController(ParseXML *XML_interface, - InputParameter *interface_ip_, - enum MemoryCtrl_type mc_type_); - void set_mc_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool enable = true); + MemoryController(); + void set_params(const ParseXML *XML, + InputParameter *interface_ip_, + enum MemoryCtrl_type mc_type_); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); ~MemoryController(); + +private: + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; + bool set_area; + + void set_mc_param(const ParseXML *XML); }; #endif /* __MEMORYCTRL_H__ */ diff --git a/src/processor.cc b/src/processor.cc index 3bb86bf..0d6d7b0 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -46,8 +46,7 @@ #include Processor::Processor(ParseXML *XML_interface) - : XML(XML_interface), // TODO: using one global copy may have problems. - mc(nullptr) { + : XML(XML_interface) { // TODO: using one global copy may have problems. /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory @@ -315,26 +314,29 @@ Processor::Processor(ParseXML *XML_interface) } if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - mc = new MemoryController(XML, &interface_ip, MC); - mc->computeEnergy(); - mc->computeEnergy(false); + mc.set_params(XML, &interface_ip, MC); + mc.computeArea(); mcs.area.set_area(mcs.area.get_area() + - mc->area.get_area() * XML->sys.mc.number_mcs); + mc.area.get_area() * XML->sys.mc.number_mcs); area.set_area(area.get_area() + - mc->area.get_area() * XML->sys.mc.number_mcs); + mc.area.get_area() * XML->sys.mc.number_mcs); + + mc.computeStaticPower(); + mc.set_stats(XML); + mc.computeDynamicPower(); set_pppm(pppm_t, - XML->sys.mc.number_mcs * mc->mcp.clockRate, + XML->sys.mc.number_mcs * mc.mcp.clockRate, XML->sys.mc.number_mcs, XML->sys.mc.number_mcs, XML->sys.mc.number_mcs); - mcs.power = mc->power * pppm_t; + mcs.power = mc.power * pppm_t; power = power + mcs.power; set_pppm(pppm_t, - 1 / mc->mcp.executionTime, + 1 / mc.mcp.executionTime, XML->sys.mc.number_mcs, XML->sys.mc.number_mcs, XML->sys.mc.number_mcs); - mcs.rt_power = mc->rt_power * pppm_t; + mcs.rt_power = mc.rt_power * pppm_t; rt_power = rt_power + mcs.rt_power; } @@ -925,7 +927,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - mc->displayEnergy(indent + 4); + mc.display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1111,8 +1113,4 @@ Processor::~Processor() { delete l2dirarray.back(); l2dirarray.pop_back(); } - if (mc) { - delete mc; - mc = nullptr; - } }; diff --git a/src/processor.h b/src/processor.h index 41eb743..416d4d4 100644 --- a/src/processor.h +++ b/src/processor.h @@ -58,7 +58,7 @@ class Processor : public Component { vector l1dirarray; vector l2dirarray; vector nocs; - MemoryController *mc; + MemoryController mc; NIUController niu; PCIeController pcie; FlashController flashcontroller; From 27d6079beee9f10740933906c3c0e9f343246f26 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 11 Jun 2020 00:35:52 -0500 Subject: [PATCH 17/59] refactor: Cache Split apart the cache files, ready to begin transforming code. --- src/CMakeLists.txt | 5 ++- src/array.h | 44 -------------------------- src/cache/CMakeLists.txt | 10 ++++++ src/cache/datacache.cc | 16 ++++++++++ src/cache/datacache.h | 54 ++++++++++++++++++++++++++++++++ src/cache/instcache.cc | 30 ++++++++++++++++++ src/cache/instcache.h | 56 ++++++++++++++++++++++++++++++++++ src/{ => cache}/sharedcache.cc | 0 src/{ => cache}/sharedcache.h | 6 ++-- 9 files changed, 172 insertions(+), 49 deletions(-) create mode 100644 src/cache/CMakeLists.txt create mode 100644 src/cache/datacache.cc create mode 100644 src/cache/datacache.h create mode 100644 src/cache/instcache.cc create mode 100644 src/cache/instcache.h rename src/{ => cache}/sharedcache.cc (100%) rename src/{ => cache}/sharedcache.h (97%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3b97b8f..d07c2ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(cacti) +add_subdirectory(cache) add_subdirectory(iocontrollers) add_subdirectory(memoryctrl) @@ -21,8 +22,6 @@ add_library(top options.cc processor.h processor.cc - sharedcache.h - sharedcache.cc version.h xmlParser.h xmlParser.cc @@ -30,7 +29,7 @@ add_library(top XML_Parse.cc ) target_include_directories(top PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(top LINK_PUBLIC cacti iocontrollers memoryctrl) +target_link_libraries(top LINK_PUBLIC cacti iocontrollers memoryctrl cache) add_executable(mcpat main.cc) diff --git a/src/array.h b/src/array.h index 6873cf0..68e4bad 100644 --- a/src/array.h +++ b/src/array.h @@ -81,49 +81,5 @@ class ArrayST : public Component { void leakage_feedback(double temperature); }; -class InstCache : public Component { -public: - ArrayST *caches; - ArrayST *missb; - ArrayST *ifb; - ArrayST *prefetchb; - powerDef power_t; // temp value holder for both (max) power and runtime power - InstCache() { - caches = 0; - missb = 0; - ifb = 0; - prefetchb = 0; - }; - ~InstCache() { - if (caches) { // caches->local_result.cleanup(); - delete caches; - caches = 0; - } - if (missb) { // missb->local_result.cleanup(); - delete missb; - missb = 0; - } - if (ifb) { // ifb->local_result.cleanup(); - delete ifb; - ifb = 0; - } - if (prefetchb) { // prefetchb->local_result.cleanup(); - delete prefetchb; - prefetchb = 0; - } - }; -}; - -class DataCache : public InstCache { -public: - ArrayST *wbb; - DataCache() { wbb = 0; }; - ~DataCache() { - if (wbb) { // wbb->local_result.cleanup(); - delete wbb; - wbb = 0; - } - }; -}; #endif /* __ARRAY_H__ */ diff --git a/src/cache/CMakeLists.txt b/src/cache/CMakeLists.txt new file mode 100644 index 0000000..8b954aa --- /dev/null +++ b/src/cache/CMakeLists.txt @@ -0,0 +1,10 @@ +add_library(cache + datacache.h + datacache.cc + instcache.h + instcache.cc + sharedcache.h + sharedcache.cc +) +target_include_directories(cache PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(cache LINK_PUBLIC cacti top) diff --git a/src/cache/datacache.cc b/src/cache/datacache.cc new file mode 100644 index 0000000..e9f6195 --- /dev/null +++ b/src/cache/datacache.cc @@ -0,0 +1,16 @@ +#include "datacache.h" + +#include +#include + +DataCache::DataCache() { + wbb = nullptr; +}; + +DataCache::~DataCache() { + if (wbb) { + // wbb->local_result.cleanup(); + delete wbb; + wbb = 0; + } +}; diff --git a/src/cache/datacache.h b/src/cache/datacache.h new file mode 100644 index 0000000..8977030 --- /dev/null +++ b/src/cache/datacache.h @@ -0,0 +1,54 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __DATACACHE_H__ +#define __DATACACHE_H__ + +#include "array.h" +#include "instcache.h" +#include "basic_components.h" +#include "cacti_interface.h" +#include "component.h" +#include "const.h" +#include "parameter.h" + +#include +#include + +class DataCache : public InstCache { +public: + ArrayST *wbb; + DataCache(); + ~DataCache(); +}; + +#endif // __DATACACHE_H__ + diff --git a/src/cache/instcache.cc b/src/cache/instcache.cc new file mode 100644 index 0000000..ac1acbd --- /dev/null +++ b/src/cache/instcache.cc @@ -0,0 +1,30 @@ +#include "instcache.h" + +#include +#include + +InstCache::InstCache() { + caches = nullptr; + missb = nullptr; + ifb = nullptr; + prefetchb = nullptr; +}; + +InstCache::~InstCache() { + if (caches) { // caches->local_result.cleanup(); + delete caches; + caches = 0; + } + if (missb) { // missb->local_result.cleanup(); + delete missb; + missb = 0; + } + if (ifb) { // ifb->local_result.cleanup(); + delete ifb; + ifb = 0; + } + if (prefetchb) { // prefetchb->local_result.cleanup(); + delete prefetchb; + prefetchb = 0; + } +}; diff --git a/src/cache/instcache.h b/src/cache/instcache.h new file mode 100644 index 0000000..304e6bd --- /dev/null +++ b/src/cache/instcache.h @@ -0,0 +1,56 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __INSTCACHE_H__ +#define __INSTCACHE_H__ + +#include "array.h" +#include "basic_components.h" +#include "cacti_interface.h" +#include "component.h" +#include "const.h" +#include "parameter.h" + +#include +#include + +class InstCache : public Component { +public: + ArrayST *caches; + ArrayST *missb; + ArrayST *ifb; + ArrayST *prefetchb; + powerDef power_t; // temp value holder for both (max) power and runtime power + InstCache(); + ~InstCache(); +}; + +#endif // __INSTCACHE_H__ diff --git a/src/sharedcache.cc b/src/cache/sharedcache.cc similarity index 100% rename from src/sharedcache.cc rename to src/cache/sharedcache.cc diff --git a/src/sharedcache.h b/src/cache/sharedcache.h similarity index 97% rename from src/sharedcache.h rename to src/cache/sharedcache.h index 6dc3648..7aaa4ec 100644 --- a/src/sharedcache.h +++ b/src/cache/sharedcache.h @@ -29,12 +29,14 @@ * ***************************************************************************/ -#ifndef SHAREDCACHE_H_ -#define SHAREDCACHE_H_ +#ifndef __SHAREDCACHE_H__ +#define __SHAREDCACHE_H__ + #include "XML_Parse.h" #include "area.h" #include "array.h" #include "basic_components.h" +#include "datacache.h" #include "logic.h" #include "parameter.h" From 876b2430ef064b0ea4dfba0b8263327940cd136a Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 11 Jun 2020 01:30:02 -0500 Subject: [PATCH 18/59] refactor: Split Core Source Split apart the core source so that it will be easier to work on. --- src/CMakeLists.txt | 11 +- src/array.h | 1 - src/cache/datacache.cc | 6 +- src/cache/datacache.h | 3 +- src/core.cc | 6106 ---------------------------------- src/core.h | 282 -- src/core/CMakeLists.txt | 22 + src/core/branch_predictor.cc | 520 +++ src/core/branch_predictor.h | 71 + src/core/core.cc | 768 +++++ src/core/core.h | 74 + src/core/exec_unit.cc | 670 ++++ src/core/exec_unit.h | 82 + src/core/instfetch.cc | 820 +++++ src/core/instfetch.h | 75 + src/core/loadstore.cc | 747 +++++ src/core/loadstore.h | 72 + src/core/mmu.cc | 287 ++ src/core/mmu.h | 67 + src/core/regfile.cc | 439 +++ src/core/regfile.h | 70 + src/core/renaming_unit.cc | 1412 ++++++++ src/core/renaming_unit.h | 71 + src/core/scheduler.cc | 795 +++++ src/core/scheduler.h | 72 + 25 files changed, 7145 insertions(+), 6398 deletions(-) delete mode 100644 src/core.cc delete mode 100644 src/core.h create mode 100644 src/core/CMakeLists.txt create mode 100644 src/core/branch_predictor.cc create mode 100644 src/core/branch_predictor.h create mode 100644 src/core/core.cc create mode 100644 src/core/core.h create mode 100644 src/core/exec_unit.cc create mode 100644 src/core/exec_unit.h create mode 100644 src/core/instfetch.cc create mode 100644 src/core/instfetch.h create mode 100644 src/core/loadstore.cc create mode 100644 src/core/loadstore.h create mode 100644 src/core/mmu.cc create mode 100644 src/core/mmu.h create mode 100644 src/core/regfile.cc create mode 100644 src/core/regfile.h create mode 100644 src/core/renaming_unit.cc create mode 100644 src/core/renaming_unit.h create mode 100644 src/core/scheduler.cc create mode 100644 src/core/scheduler.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d07c2ac..f519f6d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(cacti) add_subdirectory(cache) +add_subdirectory(core) add_subdirectory(iocontrollers) add_subdirectory(memoryctrl) @@ -9,8 +10,6 @@ add_library(top array.cc basic_components.h basic_components.cc - core.h - core.cc globalvar.h interconnect.h interconnect.cc @@ -29,7 +28,13 @@ add_library(top XML_Parse.cc ) target_include_directories(top PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(top LINK_PUBLIC cacti iocontrollers memoryctrl cache) +target_link_libraries(top + LINK_PUBLIC + cache + cacti + core + iocontrollers + memoryctrl) add_executable(mcpat main.cc) diff --git a/src/array.h b/src/array.h index 68e4bad..1d5b621 100644 --- a/src/array.h +++ b/src/array.h @@ -81,5 +81,4 @@ class ArrayST : public Component { void leakage_feedback(double temperature); }; - #endif /* __ARRAY_H__ */ diff --git a/src/cache/datacache.cc b/src/cache/datacache.cc index e9f6195..76194fe 100644 --- a/src/cache/datacache.cc +++ b/src/cache/datacache.cc @@ -3,12 +3,10 @@ #include #include -DataCache::DataCache() { - wbb = nullptr; -}; +DataCache::DataCache() { wbb = nullptr; }; DataCache::~DataCache() { - if (wbb) { + if (wbb) { // wbb->local_result.cleanup(); delete wbb; wbb = 0; diff --git a/src/cache/datacache.h b/src/cache/datacache.h index 8977030..035e59f 100644 --- a/src/cache/datacache.h +++ b/src/cache/datacache.h @@ -33,11 +33,11 @@ #define __DATACACHE_H__ #include "array.h" -#include "instcache.h" #include "basic_components.h" #include "cacti_interface.h" #include "component.h" #include "const.h" +#include "instcache.h" #include "parameter.h" #include @@ -51,4 +51,3 @@ class DataCache : public InstCache { }; #endif // __DATACACHE_H__ - diff --git a/src/core.cc b/src/core.cc deleted file mode 100644 index 83d0d27..0000000 --- a/src/core.cc +++ /dev/null @@ -1,6106 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#include "core.h" - -#include "XML_Parse.h" -#include "basic_circuit.h" -#include "const.h" -#include "io.h" -#include "parameter.h" - -#include -#include -#include -#include -#include -//#include "globalvar.h" - -InstFetchU::InstFetchU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), IB(0), BTB(0), ID_inst(0), ID_operand(0), ID_misc(0), - exist(exist_) { - if (!exist) - return; - int idx, tag, data, size, line, assoc, banks; - bool debug = false, is_default = true; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; - // Assuming all L1 caches are virtually idxed physically tagged. - // cache - - size = (int)XML->sys.core[ithCore].icache.icache_config[0]; - line = (int)XML->sys.core[ithCore].icache.icache_config[1]; - assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; - banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; - idx = debug ? 9 : int(ceil(log2(size / line / assoc))); - tag = debug ? 51 - : (int)XML->sys.physical_address_width - idx - - int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = - debug ? 32768 : (int)XML->sys.core[ithCore].icache.icache_config[0]; - interface_ip.line_sz = - debug ? 64 : (int)XML->sys.core[ithCore].icache.icache_config[1]; - interface_ip.assoc = - debug ? 8 : (int)XML->sys.core[ithCore].icache.icache_config[2]; - interface_ip.nbanks = - debug ? 1 : (int)XML->sys.core[ithCore].icache.icache_config[3]; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = - 0; // debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; - interface_ip.latency = - debug ? 3.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // interface_ip.obj_func_dyn_energy = 0; - // interface_ip.obj_func_dyn_power = 0; - // interface_ip.obj_func_leak_power = 0; - // interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, - "icache", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - scktRatio = g_tp.sckt_co_eff; - chip_PR_overhead = g_tp.chip_layout_overhead; - macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area() + - icache.caches->local_result.area); - area.set_area(area.get_area() + icache.caches->local_result.area); - // output_data_csv(icache.caches.local_result); - - /* - *iCache controllers - *miss buffer Each MSHR contains enough state - *to handle one or more accesses of any type to a single memory line. - *Due to the generality of the MSHR mechanism, - *the amount of state involved is non-trivial: - *including the address, pointers to the cache entry and destination register, - *written data, and various other pieces of state. - */ - interface_ip.num_search_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + - icache.caches->l_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.core[ithCore].icache.buffer_sizes[0] * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[4] / - clockRate; // means cycle time - interface_ip.latency = debug - ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[5] / - clockRate; // means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = - XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, - "icacheMissBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - icache.area.set_area(icache.area.get_area() + - icache.missb->local_result.area); - area.set_area(area.get_area() + icache.missb->local_result.area); - // output_data_csv(icache.missb.local_result); - - // fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data * XML->sys.core[ithCore].icache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = - XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, - "icacheFillBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); - area.set_area(area.get_area() + icache.ifb->local_result.area); - // output_data_csv(icache.ifb.local_result); - - // prefetch buffer - tag = XML->sys.physical_address_width + - EXTRA_TAG_BITS; // check with previous entries to decide wthether to - // merge. - data = icache.caches->l_ip - .line_sz; // separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = - XML->sys.core[ithCore].icache.buffer_sizes[2] * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = - XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, - "icacheprefetchBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - icache.area.set_area(icache.area.get_area() + - icache.prefetchb->local_result.area); - area.set_area(area.get_area() + icache.prefetchb->local_result.area); - // output_data_csv(icache.prefetchb.local_result); - - // Instruction buffer - data = - XML->sys.core[ithCore].instruction_length * - XML->sys.core[ithCore] - .peak_issue_width; // icache.caches.l_ip.line_sz; //multiple - // threads timing sharing the instruction buffer. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - interface_ip.pure_cam = false; - interface_ip.line_sz = int(ceil(data / 8.0)); - interface_ip.cache_sz = - XML->sys.core[ithCore].number_hardware_threads * - XML->sys.core[ithCore].instruction_buffer_size * - interface_ip.line_sz > - 64 - ? XML->sys.core[ithCore].number_hardware_threads * - XML->sys.core[ithCore].instruction_buffer_size * - interface_ip.line_sz - : 64; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - // NOTE: Assuming IB is time slice shared among threads, every fetch op will - // at least fetch "fetch width" instructions. - interface_ip.num_rw_ports = - debug - ? 1 - : XML->sys.core[ithCore] - .number_instruction_fetch_ports; // XML->sys.core[ithCore].fetch_width; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, - "InstBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - IB->area.set_area(IB->area.get_area() + IB->local_result.area); - area.set_area(area.get_area() + IB->local_result.area); - // output_data_csv(IB.IB.local_result); - - // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; - // inst_decoder.init_decoder(is_default, &interface_ip); - // inst_decoder.full_decoder_power(); - - if (coredynp.predictionW > 0) { - /* - * BTB branch target buffer, accessed during IF stage. Virtually indexed and - * virtually tagged It is only a cache without all the buffers in the cache - * controller since it is more like a look up table than a cache with cache - * controller. When access miss, no load from other places such as main - * memory (not actively fill the misses), it is passively updated under two - * circumstances: 1) when BPT@ID stage finds out current is a taken branch - * while BTB missed 2) When BPT@ID stage predicts differently than BTB 3) - * When ID stage finds out current instruction is not a branch while BTB had - * a hit.(mark as invalid) 4) when EXEU find out wrong target has been - * provided from BTB. - * - */ - size = XML->sys.core[ithCore].BTB.BTB_config[0]; - line = XML->sys.core[ithCore].BTB.BTB_config[1]; - assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; - banks = XML->sys.core[ithCore].BTB.BTB_config[3]; - idx = debug ? 9 : int(ceil(log2(size / line / assoc))); - // tag = - // debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + - // int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) - // +EXTRA_TAG_BITS; - tag = debug ? 51 - : XML->sys.virtual_address_width + - int(ceil(log2( - XML->sys.core[ithCore].number_hardware_threads))) + - EXTRA_TAG_BITS; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug ? 32768 : size; - interface_ip.line_sz = debug ? 64 : line; - interface_ip.assoc = debug ? 8 : assoc; - interface_ip.nbanks = debug ? 1 : banks; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = - 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].BTB.BTB_config[4] / clockRate; - interface_ip.latency = - debug ? 3.0 / clockRate - : XML->sys.core[ithCore].BTB.BTB_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, - "Branch Target Buffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); - area.set_area(area.get_area() + BTB->local_result.area); - /// cout<<"area="<area.get_area()); - } - - ID_inst = new inst_decoder(is_default, - &interface_ip, - coredynp.opcode_length, - 1 /*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, - coredynp.core_ty); - - ID_operand = new inst_decoder(is_default, - &interface_ip, - coredynp.arch_ireg_width, - 1, - coredynp.x86, - Core_device, - coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, - &interface_ip, - 8 /* Prefix field etc upto 14B*/, - 1, - coredynp.x86, - Core_device, - coredynp.core_ty); - // TODO: X86 decoder should decode the inst in cyclic mode under the control - // of squencer. So the dynamic power should be multiplied by a few times. - area.set_area(area.get_area() + - (ID_inst->area.get_area() + ID_operand->area.get_area() + - ID_misc->area.get_area()) * - coredynp.decodeW); -} - -BranchPredictor::BranchPredictor(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), globalBPT(0), localBPT(0), L1_localBPT(0), - L2_localBPT(0), chooser(0), RAS(0), exist(exist_) { - /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in - * Alpha 21264, including global predictor, local two level predictor, and - * Chooser. The Branch predictor also includes a RAS (return address stack) - * for function calls Branch predictors are tagged by thread ID and modeled as - * 1-way associative cache. However RAS return address stacks are duplicated - * for each thread. - * TODO:Data Width need to be computed more precisely * - */ - if (!exist) - return; - int tag, data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.assoc = 1; - interface_ip.pure_cam = false; - if (coredynp.multithreaded) { - - tag = int(log2(coredynp.num_hthreads) + EXTRA_TAG_BITS); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - } else { - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - } - // Global predictor - data = - int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits / 8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].predictor.global_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, - "Global Predictor", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area() + - globalBPT->local_result.area); - area.set_area(area.get_area() + globalBPT->local_result.area); - - // Local BPT (Level 1) - data = - int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0] / 8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, - "L1 local Predictor", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area() + - L1_localBPT->local_result.area); - area.set_area(area.get_area() + L1_localBPT->local_result.area); - - // Local BPT (Level 2) - data = - int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1] / 8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, - "L2 local Predictor", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area() + - L2_localBPT->local_result.area); - area.set_area(area.get_area() + L2_localBPT->local_result.area); - - // Chooser - data = - int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits / 8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].predictor.chooser_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, - "Predictor Chooser", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area() + chooser->local_result.area); - area.set_area(area.get_area() + chooser->local_result.area); - - // RAS return address stacks are Duplicated for each thread. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - data = int(ceil(coredynp.pc_width / 8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data * XML->sys.core[ithCore].RAS_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST( - &interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area() + - RAS->local_result.area * coredynp.num_hthreads); - area.set_area(area.get_area() + - RAS->local_result.area * coredynp.num_hthreads); -} - -SchedulerU::SchedulerU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), int_inst_window(0), fp_inst_window(0), ROB(0), - instruction_selection(0), exist(exist_) { - if (!exist) - return; - int tag, data; - bool is_default = true; - string tmp_name; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if ((coredynp.core_ty == Inorder && coredynp.multithreaded)) { - // Instruction issue queue, in-order multi-issue or multithreaded processor - // also has this structure. Unified window for Inorder processors - tag = int(log2(XML->sys.core[ithCore].number_hardware_threads) * - coredynp.perThreadState); // This is the normal thread state bits - // based on Niagara Design - data = XML->sys.core[ithCore].instruction_length; - // NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and - // IA-32 Architectures Software Developer’s Manual - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = int(ceil(data / 8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = - XML->sys.core[ithCore].instruction_window_size * interface_ip.line_sz > - 64 - ? XML->sys.core[ithCore].instruction_window_size * - interface_ip.line_sz - : 64; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, - "InstFetchQueue", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area() + - int_inst_window->local_result.area * - coredynp.num_pipelines); - area.set_area(area.get_area() + - int_inst_window->local_result.area * coredynp.num_pipelines); - // output_data_csv(iRS.RS.local_result); - Iw_height = int_inst_window->local_result.cache_ht; - - /* - * selection logic - * In a single-issue Inorder multithreaded processor like Niagara, issue - * width=1*number_of_threads since the processor does need to pick up - * instructions from multiple ready ones(although these ready ones are from - * different threads).While SMT processors do not distinguish which thread - * belongs to who at the issue stage. - */ - interface_ip.assoc = - 1; // reset to prevent unnecessary warning messages when init_interface - instruction_selection = new selection_logic( - is_default, - XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW * XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, - Core_device, - coredynp.core_ty); - } - - if (coredynp.core_ty == OOO) { - /* - * CAM based instruction window - * For physicalRegFilebased OOO it is the instruction issue queue, where - * only tags of phy regs are stored For RS based OOO it is the Reservation - * station, where both tags and values of phy regs are stored It is written - * once and read twice(two operands) before an instruction can be issued. - * X86 instruction can be very long up to 15B. add instruction length in XML - */ - if (coredynp.scheu_ty == PhysicalRegFile) { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be - // stored. This underestimate the search power - data = - int((ceil((coredynp.instruction_length + - 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width)) / - 2.0) / - 8.0)); - // Data width being divided by 2 means only after both operands available - // the whole data will be read out. This is modeled using two equivalent - // readouts with half of the data width - tmp_name = "InstIssueQueue"; - } else { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be - // stored. This underestimate the search power - data = - int(ceil(((coredynp.instruction_length + - 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width) + - 2 * coredynp.int_data_width) / - 2.0) / - 8.0)); - // Data width being divided by 2 means only after both operands available - // the whole data will be read out. This is modeled using two equivalent - // readouts with half of the data width - - tmp_name = "IntReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 2 * 1.0 / clockRate; - interface_ip.latency = 2 * 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, - tmp_name, - Core_device, - coredynp.opt_local, - coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area() + - int_inst_window->local_result.area * - coredynp.num_pipelines); - area.set_area(area.get_area() + - int_inst_window->local_result.area * coredynp.num_pipelines); - Iw_height = int_inst_window->local_result.cache_ht; - // FU inst window - if (coredynp.scheu_ty == PhysicalRegFile) { - tag = 2 * coredynp.phy_freg_width; // TODO: each time only half of the tag - // is compared - data = - int(ceil((coredynp.instruction_length + - 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width)) / - 8.0)); - tmp_name = "FPIssueQueue"; - } else { - tag = 2 * coredynp.phy_ireg_width; - data = - int(ceil((coredynp.instruction_length + - 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width) + - 2 * coredynp.fp_data_width) / - 8.0)); - tmp_name = "FPReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].fp_instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_issueW; - interface_ip.num_wr_ports = coredynp.fp_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, - tmp_name, - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area() + - fp_inst_window->local_result.area * - coredynp.num_fp_pipelines); - area.set_area(area.get_area() + fp_inst_window->local_result.area * - coredynp.num_fp_pipelines); - fp_Iw_height = fp_inst_window->local_result.cache_ht; - - if (XML->sys.core[ithCore].ROB_size > 0) { - /* - * if ROB_size = 0, then the target processor does not support - *hardware-based speculation, i.e. , the processor allow OOO issue as well - *as OOO completion, which means branch must be resolved before - *instruction issued into instruction window, since there is no change to - *flush miss-predict branch path after instructions are issued in this - *situation. - * - * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. - * One old approach is to combine the RAT and ROB as a huge CAM structure - *as in AMD K7. However, this approach is abandoned due to its high power - *and poor scalability. McPAT uses current implementation of ROB as - *circular buffer. ROB is written once when instruction is issued and read - *once when the instruction is committed. * - */ - - int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); - data = int(ceil( - (robExtra + coredynp.pc_width + - ((coredynp.rm_ty == RAMbased) - ? (coredynp.phy_ireg_width + coredynp.phy_freg_width) - : fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width)) + - ((coredynp.scheu_ty == PhysicalRegFile) ? 0 - : coredynp.fp_data_width)) / - 8.0)); - /* - * 5 bits are: busy, Issued, Finished, speculative, valid; - * PC is to id the instruction for recover - * exception/mis-prediction. When using RAM-based RAT, ROB needs to - * contain the ARF-PRF mapping to index the correct entry in the RAT, so - * that the correct architecture register (and freelist) can be found and - * the RAT can be appropriately updated; otherwise, the RAM-based RAT - * needs to support search ops to identify the target architecture - * register that needs to be updated, or the physical resigner that needs - * to be recycled; When using CAM-based RAT, ROB only needs to contain - * destination physical register since the CAM-base RAT can search for the - * corresponding ARF-PRF mapping to find the correct entry in the RAT, so - * that the correct architecture register (and freelist/bits) can be found - * and the RAT can be appropriately updated. ROB phy_reg entry should use - * the larger one from phy_ireg and phy_freg; fdata_width is always - * larger. Latest Intel Processors may have different ROB/RS designs. - */ - - /* - if(coredynp.scheu_ty==PhysicalRegFile) - { - //PC is to id the instruction for recover - exception. - //inst is used to map the renamed dest. - registers.so that commit stage can know which reg/RRAT to update - // data = - int(ceil((robExtra+coredynp.pc_width - + - // coredynp.instruction_length - + 2*coredynp.phy_ireg_width)/8.0)); - - if (coredynp.rm_ty ==RAMbased) - { - data = int(ceil((robExtra + - coredynp.pc_width + (coredynp.phy_ireg_width, - coredynp.phy_freg_width))/8.0)); - //When using RAM-based RAT, ROB - needs to contain the ARF-PRF mapping to index the correct entry in the - RAT, - //so that the correct architecture - register (and freelist) can be found and the RAT can be appropriately - updated. - } - else if ((coredynp.rm_ty ==CAMbased)) - { - data = - int(ceil((robExtra+coredynp.pc_width + fmax(coredynp.phy_ireg_width, - coredynp.phy_freg_width))/8.0)); - //When using CAM-based RAT, ROB - needs to contain the ARF-PRF mapping to index the correct entry in the - RAT, - //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated. - } - } - else - { - //in RS based OOO, ROB also contains value - of destination reg - // data = - int(ceil((robExtra+coredynp.pc_width - + - // coredynp.instruction_length + 2*coredynp.phy_ireg_width - + coredynp.fp_data_width)/8.0)); - - //using phy_reg number to search in the - RAT, the correct architecture register can be found and the RAT can be - appropriately updated. - //ROB phy_reg entry should use the larger - one from ireg and freg; fdata_width is always larger; Latest Intel - Processors may have different ROB/RS designs. data = int(ceil((robExtra + - coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width) - + coredynp.fp_data_width)/8.0)); - } - */ - - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore] - .ROB_size; // The XML ROB size is for all threads - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_commitW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, - "ReorderBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area() + - ROB->local_result.area * coredynp.num_pipelines); - area.set_area(area.get_area() + - ROB->local_result.area * coredynp.num_pipelines); - ROB_height = ROB->local_result.cache_ht; - } - - instruction_selection = - new selection_logic(is_default, - XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, - &interface_ip, - Core_device, - coredynp.core_ty); - } -} - -LoadStoreU::LoadStoreU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), LSQ(0), LoadQ(0), exist(exist_) { - if (!exist) - return; - int idx, tag, data, size, line, assoc, banks; - bool debug = false; - int ldst_opcode = XML->sys.core[ithCore].opcode_width; // 16; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; - - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // Dcache - size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - idx = debug ? 9 : int(ceil(log2(size / line / assoc))); - tag = debug ? 51 - : XML->sys.physical_address_width - idx - int(ceil(log2(line))) + - EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = - debug ? 32768 : (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - interface_ip.line_sz = - debug ? 64 : (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - interface_ip.assoc = - debug ? 8 : (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - interface_ip.nbanks = - debug ? 1 : (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = - 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 3.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - debug - ? 1 - : XML->sys.core[ithCore] - .memory_ports; // usually In-order has 1 and OOO has 2 at least. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, - "dcache", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.caches->local_result.area); - area.set_area(area.get_area() + dcache.caches->local_result.area); - // output_data_csv(dcache.caches.local_result); - - // dCache controllers - // miss buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + - dcache.caches->l_ip.line_sz * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.core[ithCore].dcache.buffer_sizes[0] * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; - ; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, - "dcacheMissBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.missb->local_result.area); - area.set_area(area.get_area() + dcache.missb->local_result.area); - // output_data_csv(dcache.missb.local_result); - - // fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data * XML->sys.core[ithCore].dcache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; - ; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, - "dcacheFillBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + dcache.ifb->local_result.area); - area.set_area(area.get_area() + dcache.ifb->local_result.area); - // output_data_csv(dcache.ifb.local_result); - - // prefetch buffer - tag = XML->sys.physical_address_width + - EXTRA_TAG_BITS; // check with previous entries to decide wthether to - // merge. - data = dcache.caches->l_ip - .line_sz; // separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = - XML->sys.core[ithCore].dcache.buffer_sizes[2] * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; - ; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, - "dcacheprefetchBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.prefetchb->local_result.area); - area.set_area(area.get_area() + dcache.prefetchb->local_result.area); - // output_data_csv(dcache.prefetchb.local_result); - - // WBB - - if (cache_p == Write_back) { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = - XML->sys.core[ithCore].dcache.buffer_sizes[3] * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, - "dcacheWBB", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.wbb->local_result.area); - area.set_area(area.get_area() + dcache.wbb->local_result.area); - // output_data_csv(dcache.wbb.local_result); - } - - /* - * LSU--in-order processors do not have separate load queue: unified lsq - * partitioned among threads - * it is actually the store queue but for inorder processors it serves as both - * loadQ and StoreQ - */ - tag = ldst_opcode + XML->sys.virtual_address_width + - int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + - EXTRA_TAG_BITS; - data = XML->sys.machine_bits; - interface_ip.is_cache = true; - interface_ip.line_sz = int(ceil(data / 32.0)) * 4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = - XML->sys.core[ithCore].store_buffer_size * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, - "Load(Store)Queue", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area() + LSQ->local_result.area); - area.set_area(area.get_area() + LSQ->local_result.area); - // output_data_csv(LSQ.LSQ.local_result); - lsq_height = - LSQ->local_result.cache_ht * - sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ - - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - interface_ip.line_sz = int(ceil(data / 32.0)) * 4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = - XML->sys.core[ithCore].load_buffer_size * interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, - "LoadQueue", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); - area.set_area(area.get_area() + LoadQ->local_result.area); - // output_data_csv(LoadQ.LoadQ.local_result); - lsq_height = - (LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht) * - sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ - } - area.set_area(area.get_area() * cdb_overhead); -} - -MemManU::MemManU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), itlb(0), dtlb(0), exist(exist_) { - if (!exist) - return; - int tag, data; - bool debug = false; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.specific_tag = 1; - // Itlb TLBs are partioned among threads according to Nigara and Nehalem - tag = XML->sys.virtual_address_width - - int(floor(log2(XML->sys.virtual_memory_page_size))) + - int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + - EXTRA_TAG_BITS; - data = XML->sys.physical_address_width - - int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.core[ithCore].itlb.number_entries * - interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = - debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST( - &interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area() + itlb->local_result.area); - area.set_area(area.get_area() + itlb->local_result.area); - // output_data_csv(itlb.tlb.local_result); - - // dtlb - tag = XML->sys.virtual_address_width - - int(floor(log2(XML->sys.virtual_memory_page_size))) + - int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + - EXTRA_TAG_BITS; - data = XML->sys.physical_address_width - - int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.core[ithCore].dtlb.number_entries * - interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 0; - interface_ip.throughput = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; - interface_ip.latency = - debug ? 1.0 / clockRate - : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST( - &interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area() + dtlb->local_result.area); - area.set_area(area.get_area() + dtlb->local_result.area); - // output_data_csv(dtlb.tlb.local_result); -} - -RegFU::RegFU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), IRF(0), FRF(0), RFWIN(0), exist(exist_) { - /* - * processors have separate architectural register files for each thread. - * therefore, the bypass buses need to travel across all the register files. - */ - - if (!exist) - return; - int data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - //**********************************IRF*************************************** - data = coredynp.int_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data / 32.0)) * 4; - interface_ip.cache_sz = coredynp.num_IRF_entry * interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - 1; // this is the transfer port for saving/restoring states when - // exceptions happen. - interface_ip.num_rd_ports = 2 * coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, - "Integer Register File", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area() + - IRF->local_result.area * coredynp.num_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - area.set_area(area.get_area() + - IRF->local_result.area * coredynp.num_pipelines * cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - // area.set_area(area.get_area()*cdb_overhead); - // output_data_csv(IRF.RF.local_result); - - //**********************************FRF*************************************** - data = coredynp.fp_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data / 32.0)) * 4; - interface_ip.cache_sz = coredynp.num_FRF_entry * interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - 1; // this is the transfer port for saving/restoring states when - // exceptions happen. - interface_ip.num_rd_ports = 2 * XML->sys.core[ithCore].issue_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; - interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, - "Floating point Register File", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area() + - FRF->local_result.area * coredynp.num_fp_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - area.set_area(area.get_area() + - FRF->local_result.area * coredynp.num_fp_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - // area.set_area(area.get_area()*cdb_overhead); - // output_data_csv(FRF.RF.local_result); - int_regfile_height = IRF->local_result.cache_ht * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1) * - sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1) * - sqrt(cdb_overhead); - // since a EXU is associated with each pipeline, the cdb should not have - // longer length. - if (coredynp.regWindowing) { - //*********************************REG_WIN************************************ - data = - coredynp - .int_data_width; // ECC, and usually 2 regs are transfered together - // during window shifting.Niagara Mega cell - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data / 8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size * - IRF->l_ip.cache_sz * - XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 4.0 / clockRate; - interface_ip.latency = 4.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = - 1; // this is the transfer port for saving/restoring states when - // exceptions happen. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, - "RegWindow", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area() + - RFWIN->local_result.area * coredynp.num_pipelines); - area.set_area(area.get_area() + - RFWIN->local_result.area * coredynp.num_pipelines); - // output_data_csv(RFWIN.RF.local_result); - } -} - -EXECU::EXECU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - double lsq_height_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), fp_u(0), - exeu(0), mul(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), - intTag_mul_Bypass(0), fp_bypass(0), fpTagBypass(0), exist(exist_) { - bool exist_flag = true; - if (!exist) - return; - double fu_height = 0.0; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip, coredynp); - scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); - exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); - area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + - scheu->area.get_area()); - fu_height = exeu->FU_height; - if (coredynp.num_fpus > 0) { - fp_u = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, FPU); - area.set_area(area.get_area() + fp_u->area.get_area()); - } - if (coredynp.num_muls > 0) { - mul = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, MUL); - area.set_area(area.get_area() + mul->area.get_area()); - fu_height += mul->FU_height; - } - /* - * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; - * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the - * same bus there are multiple tri-state drivers and muxes that go to - * different components on the same bus - */ - if (XML->sys.Embedded) { - interface_ip.wt = Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - } else { - interface_ip.wt = Global; - interface_ip.wire_is_mat_type = - 2; // start from semi-global since local wires are already used - interface_ip.wire_os_mat_type = 2; - interface_ip.throughput = 10.0 / clockRate; // Do not care - interface_ip.latency = 10.0 / clockRate; - } - - if (coredynp.core_ty == Inorder) { - int_bypass = - new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + - lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); - - if (coredynp.num_muls > 0) { - int_mul_bypass = - new interconnect("Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); - intTag_mul_Bypass = - new interconnect("Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus > 0) { - fp_bypass = - new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + fp_u->FU_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + - lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); - } - } else { // OOO - if (coredynp.scheu_ty == PhysicalRegFile) { - /* For physical register based OOO, - * data broadcast interconnects cover across functional units, lsq, inst - * windows and register files, while tag broadcast interconnects also - * cover across ROB - */ - int_bypass = new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + - lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + - exeu->FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); - - if (coredynp.num_muls > 0) { - int_mul_bypass = - new interconnect("Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - fpTagBypass = new interconnect( - "FP Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); - } - } else { - /* - * In RS based processor both data and tag are broadcast together, - * covering functional units, lsq, nst windows, register files, and ROBs - */ - int_bypass = new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + - lsq_height + scheu->Iw_height + - scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + - exeu->FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); - if (coredynp.num_muls > 0) { - int_mul_bypass = new interconnect( - "Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + - lsq_height + scheu->fp_Iw_height + - scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - fpTagBypass = new interconnect( - "FP Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); - } - } - } - area.set_area(area.get_area() + bypass.area.get_area()); -} - -RENAMINGU::RENAMINGU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), iFRAT(0), fFRAT(0), iRRAT(0), fRRAT(0), ifreeL(0), - ffreeL(0), idcl(0), fdcl(0), RAHT(0), exist(exist_) { - /* - * Although renaming logic maybe be used in in-order processors, -* McPAT assumes no renaming logic is used since the performance gain is very -limited and -* the only major inorder processor with renaming logic is Itainium -* that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent -structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but -only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all -when context switch - * - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * RAM-based RAT is duplicated/partitioned for each different hardware threads - * CAM-based RAT is shared for all hardware threads - * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N -(N-way SMT) sets of entries, with each set for a thread. - * The RAT control logic will determine different sets to use for different -threads. But it does not need extra tag bits in the entries. - * However, CAM-based RAT need extra tag bits to distinguish the architecture -register ids for different threads. - - * - * checkpointing of RAT and RRAT are both for architecture state recovery with -events including mis-speculation; - * Checkpointing is easier to implement in CAM than in RAM based RAT, despite -of the inferior scalabilty of the CAM-based RATs. - * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 -checkpoints (based on MIPS designs) for RAM based RATs, - * thus CAM-based RAT does not need RRAT - * Although no Dual-RAT is needed in RS-based OOO processors, since archi -RegFile contains the committed register values, - * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, -when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT. - * - * RAM-base RAT does not need to scan/search all contents during instruction -commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is -used for index the RAT entry to be updated. - * - * Both RAM and CAM have same DCL - * - - * - */ - if (!exist) - return; - int tag, data, out_w; - // interface_ip.wire_is_mat_type = 0; - // interface_ip.wire_os_mat_type = 0; - // interface_ip.wt = Global_30; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if (coredynp.core_ty == OOO) { - // integer pipeline - if (coredynp.scheu_ty == PhysicalRegFile) { - if (coredynp.rm_ty == - RAMbased) { // FRAT with global checkpointing (GCs) please see paper - // tech report for detailed explanation. - data = int(ceil(coredynp.phy_ireg_width * - (1 + coredynp.globalCheckpoint) / 8.0)); // 33; - out_w = int(ceil(coredynp.phy_ireg_width / 8.0)); // bytes - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * - XML->sys.core[ithCore].archi_Regs_IRF_size * - XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // the extra one port is for GCs - interface_ip.num_rd_ports = 2 * coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - - // FRAT floating point - data = int(ceil(coredynp.phy_freg_width * - (1 + coredynp.globalCheckpoint) / 8.0)); - out_w = int(ceil(coredynp.phy_freg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * - XML->sys.core[ithCore].archi_Regs_FRF_size * - XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // the extra one port is for GCs - interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); - - } else if (coredynp.rm_ty == CAMbased) { - // FRAT - tag = coredynp.arch_ireg_width + coredynp.hthread_width; - data = int( - ceil((coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / - 8.0)); // each checkpoint in the CAM-based RAT design needs - // only 1 bit, see "a power-aware hybrid ram-cam - // renaming mechanism for fast recovery" - out_w = int(ceil(coredynp.arch_ireg_width / 8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - - // FRAT for FP - tag = coredynp.arch_freg_width + coredynp.hthread_width; - data = int( - ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / - 8.0)); // each checkpoint in the CAM-based RAT design needs - // only 1 bit, see "a power-aware hybrid ram-cam - // renaming mechanism for fast recovery" - out_w = int(ceil(coredynp.arch_freg_width / 8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // for GCs - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); - } - - // RRAT is always RAM based, does not have GCs, and is used only for - // record latest non-speculative mapping RRAT is not needed for CAM-based - // RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is - // not needed for RAM-based RAT with checkpoints McPAT assumes renaming - // unit to have RRAT when there is no checkpoints in FRAT, while MIPS - // R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with - // FRAT is very costly, especially for high issue width and high clock - // rate. - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - data = int(ceil(coredynp.phy_ireg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * - XML->sys.core[ithCore] - .number_hardware_threads; // HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, - "Int RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); - area.set_area(area.get_area() + iRRAT->area.get_area()); - - // RRAT for FP - data = int(ceil(coredynp.phy_freg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * - XML->sys.core[ithCore] - .number_hardware_threads; // HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, - "FP RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); - area.set_area(area.get_area() + fRRAT->area.get_area()); - } - // Freelist of renaming unit always RAM based and needed for RAM-based - // RATs. Although it can be implemented within the CAM-based RAT, Current - // McPAT does not have the free bits in the CAM but use the same external - // free list as a close approximation for CAM RAT. Recycle happens at two - // places: 1)when DCL check there are WAW, the Phy-registers/ROB directly - // recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - // therefore num_wr port = decode-1(-1 means at least one phy reg will be - // used for the current renaming group) + commit width - data = int(ceil(coredynp.phy_ireg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = - coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; - // every cycle, (coredynp.decodeW -1) inst may need to send back it dest - // tags, committW insts needs to update freelist buffers - interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, - "Int Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area() + - ifreeL->local_result.area); - area.set_area(area.get_area() + ifreeL->area.get_area()); - - // freelist for FP - data = int(ceil(coredynp.phy_freg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * coredynp.num_ffreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = - coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, - "FP Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area() + - ffreeL->local_result.area); - area.set_area(area.get_area() + ffreeL->area.get_area()); - - idcl = new dep_resource_conflict_check( - &interface_ip, - coredynp, - coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( - &interface_ip, coredynp, coredynp.phy_freg_width); - - } else if (coredynp.scheu_ty == ReservationStation) { - if (coredynp.rm_ty == RAMbased) { - - data = int(ceil(coredynp.phy_ireg_width * - (1 + coredynp.globalCheckpoint) / 8.0)); - out_w = int(ceil(coredynp.phy_ireg_width / - 8.0)); // GC does not need to be readout - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * - XML->sys.core[ithCore].archi_Regs_IRF_size * - XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // the extra one port is for GCs - interface_ip.num_rd_ports = 2 * coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iFRAT->local_result.adjust_area(); - // iFRAT->local_result.power.readOp.dynamic *= - // 1+0.2*0.05;//1+mis-speculation% TODO - // iFRAT->local_result.power.writeOp.dynamic - //*=1+0.2*0.05;//compensate for GC - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - - // FP - data = int(ceil(coredynp.phy_freg_width * - (1 + coredynp.globalCheckpoint) / 8.0)); - out_w = int(ceil(coredynp.phy_freg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * - XML->sys.core[ithCore].archi_Regs_FRF_size * - XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // the extra one port is for GCs - interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fFRAT->local_result.adjust_area(); - // fFRAT->local_result.power.readOp.dynamic *= - // 1+0.2*0.05;//1+mis-speculation% TODO - // fFRAT->local_result.power.writeOp.dynamic - //*=1+0.2*0.05;//compensate for GC - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); - - } else if (coredynp.rm_ty == CAMbased) { - // FRAT - tag = coredynp.arch_ireg_width + coredynp.hthread_width; - data = int(ceil( - (coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / 8.0)); - out_w = int(ceil(coredynp.arch_ireg_width / - 8.0)); // GC bits does not need to be sent out - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - - // FRAT - tag = coredynp.arch_freg_width + coredynp.hthread_width; - data = int( - ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / - 8.0)); // the address of CAM needed to be sent out - out_w = int(ceil(coredynp.arch_freg_width / 8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w * 8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // for GCs - interface_ip.num_rd_ports = - XML->sys.core[ithCore].decode_width; // 0;TODO; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); - } - // Although no RRAT for RS based OOO is really needed since the archiRF - // always holds the non-speculative data, having the RRAT or GC (not both) - // can help the recovery of mis-speculations. - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - data = int(ceil(coredynp.phy_ireg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * - XML->sys.core[ithCore] - .number_hardware_threads; // HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, - "Int RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); - area.set_area(area.get_area() + iRRAT->area.get_area()); - - // RRAT for FP - data = int(ceil(coredynp.phy_freg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = - data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * - XML->sys.core[ithCore] - .number_hardware_threads; // HACK--2 to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, - "FP RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); - area.set_area(area.get_area() + fRRAT->area.get_area()); - } - - // Freelist of renaming unit of RS based OOO is unifed for both int and fp - // renaming unit since the ROB is unified - data = int(ceil(coredynp.phy_ireg_width / 8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz * 8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0 / clockRate; - interface_ip.latency = 1.0 / clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; // TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = - coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; - interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, - "Unified Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - // ifreeL->area.set_area(ifreeL->area.get_area()+ - // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area() + ifreeL->area.get_area()); - - idcl = new dep_resource_conflict_check( - &interface_ip, - coredynp, - coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( - &interface_ip, coredynp, coredynp.phy_freg_width); - } - } - if (coredynp.core_ty == Inorder && coredynp.issueW > 1) { - /* Dependency check logic will only present when decode(issue) width>1. - * Multiple issue in order processor can do without renaming, but dcl is a - * must. - */ - idcl = new dep_resource_conflict_check( - &interface_ip, - coredynp, - coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( - &interface_ip, coredynp, coredynp.phy_freg_width); - } -} - -Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - ifu(0), lsu(0), mmu(0), exu(0), rnu(0), corepipe(0), undiffCore(0), - l2cache(0) { - /* - * initialize, compute and optimize individual components. - */ - - bool exit_flag = true; - - double pipeline_area_per_unit; - // interface_ip.wire_is_mat_type = 2; - // interface_ip.wire_os_mat_type = 2; - // interface_ip.wt =Global_30; - set_core_param(); - - if (XML->sys.Private_L2) { - l2cache = new SharedCache(XML, ithCore, &interface_ip); - } - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); - lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); - mmu = new MemManU(XML, ithCore, &interface_ip, coredynp, exit_flag); - exu = new EXECU( - XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); - if (coredynp.core_ty == OOO) { - rnu = new RENAMINGU(XML, ithCore, &interface_ip, coredynp); - } - corepipe = new Pipeline(&interface_ip, coredynp); - - if (coredynp.core_ty == OOO) { - pipeline_area_per_unit = - (corepipe->area.get_area() * coredynp.num_pipelines) / 5.0; - if (rnu->exist) { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); - } - } else { - pipeline_area_per_unit = - (corepipe->area.get_area() * coredynp.num_pipelines) / 4.0; - } - - // area.set_area(area.get_area()+ corepipe->area.get_area()); - if (ifu->exist) { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); - } - if (lsu->exist) { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); - } - if (exu->exist) { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + exu->area.get_area()); - } - if (mmu->exist) { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + mmu->area.get_area()); - } - - if (coredynp.core_ty == OOO) { - if (rnu->exist) { - - area.set_area(area.get_area() + rnu->area.get_area()); - } - } - - if (undiffCore->exist) { - area.set_area(area.get_area() + undiffCore->area.get_area()); - } - - if (XML->sys.Private_L2) { - area.set_area(area.get_area() + l2cache->area.get_area()); - } - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. - // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); -} - -void BranchPredictor::computeEnergy(bool is_tdp) { - if (!exist) - return; - double r_access; - double w_access; - if (is_tdp) { - r_access = coredynp.predictionW * coredynp.BR_duty_cycle; - w_access = 0 * coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; - } else { - // The resolution of BPT accesses is coarse, but this is - // because most simulators cannot track finer grained details - r_access = XML->sys.core[ithCore].branch_instructions; - w_access = - XML->sys.core[ithCore].branch_mispredictions + - 0.1 * XML->sys.core[ithCore] - .branch_instructions; // 10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; - } - - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += - globalBPT->local_result.power.readOp.dynamic * - globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access * - globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += - L1_localBPT->local_result.power.readOp.dynamic * - L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access * - L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += - L2_localBPT->local_result.power.readOp.dynamic * - L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access * - L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += - chooser->local_result.power.readOp.dynamic * - chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access * - chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += - RAS->local_result.power.readOp.dynamic * RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access * RAS->local_result.power.writeOp.dynamic; - - if (is_tdp) { - globalBPT->power = - globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; - L1_localBPT->power = - L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; - L2_localBPT->power = - L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power * pppm_lkg; - RAS->power = - RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; - - power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + - chooser->power + RAS->power; - } else { - globalBPT->rt_power = - globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; - L1_localBPT->rt_power = - L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; - L2_localBPT->rt_power = - L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; - chooser->rt_power = - chooser->power_t + chooser->local_result.power * pppm_lkg; - RAS->rt_power = - RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + - L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power; - } -} - -void BranchPredictor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - if (is_tdp) { - cout << indent_str << "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << globalBPT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? globalBPT->power.readOp.longer_channel_leakage - : globalBPT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? globalBPT->power.readOp - .power_gated_with_long_channel_leakage - : globalBPT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << globalBPT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "Local Predictor:" << endl; - cout << indent_str << "L1_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? L1_localBPT->power.readOp.longer_channel_leakage - : L1_localBPT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? L1_localBPT->power.readOp - .power_gated_with_long_channel_leakage - : L1_localBPT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << L1_localBPT->rt_power.readOp.dynamic / executionTime << " W" - << endl; - cout << endl; - cout << indent_str << "L2_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? L2_localBPT->power.readOp.longer_channel_leakage - : L2_localBPT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? L2_localBPT->power.readOp - .power_gated_with_long_channel_leakage - : L2_localBPT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << L2_localBPT->rt_power.readOp.dynamic / executionTime << " W" - << endl; - cout << endl; - - cout << indent_str << "Chooser:" << endl; - cout << indent_str_next << "Area = " << chooser->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << chooser->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? chooser->power.readOp.longer_channel_leakage - : chooser->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? chooser->power.readOp.power_gated_with_long_channel_leakage - : chooser->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << chooser->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "RAS:" << endl; - cout << indent_str_next << "Area = " << RAS->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << RAS->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? RAS->power.readOp.longer_channel_leakage - : RAS->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? RAS->power.readOp.power_gated_with_long_channel_leakage - : RAS->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - } else { - // cout << indent_str_next << "Global Predictor Peak Dynamic = " - //<< globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; - // cout << indent_str_next << "Global Predictor Subthreshold Leakage = " - // << globalBPT->rt_power.readOp.leakage <<" W" << endl; cout << - // indent_str_next - //<< "Global Predictor Gate Leakage = " << - // globalBPT->rt_power.readOp.gate_leakage << " W" << endl; cout - // << indent_str_next << "Local Predictor Peak Dynamic = " << - // L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Local Predictor Subthreshold Leakage = " << - // L1_localBPT->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next << "Local Predictor Gate Leakage = " << - // L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; cout - // << indent_str_next << "Chooser Peak Dynamic = " << - // chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Chooser Subthreshold Leakage = " << - // chooser->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - //<< "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << - //" W" << endl; cout << indent_str_next << "RAS Peak Dynamic = " - //<< RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; - // cout << indent_str_next << "RAS Subthreshold Leakage = " << - // RAS->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" - //<< endl; - } -} - -void InstFetchU::computeEnergy(bool is_tdp) { - if (!exist) - return; - if (is_tdp) { - // init stats for Peak - icache.caches->stats_t.readAc.access = - icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = - icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = - icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.missb->tdp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.ifb->tdp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = - icache.prefetchb->stats_t.readAc.hit = - icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = - XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; - - if (coredynp.predictionW > 0) { - BTB->stats_t.readAc.access = - coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = - 0; // XML->sys.core[ithCore].BTB.write_accesses; - } - - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; - - } else { - // init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = - XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = - XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = - icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = - icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = - XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; - - if (coredynp.predictionW > 0) { - BTB->stats_t.readAc.access = - XML->sys.core[ithCore] - .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = - XML->sys.core[ithCore] - .BTB - .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; - } - - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = - XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; - } - - icache.power_t.reset(); - IB->power_t.reset(); - // ID_inst->power_t.reset(); - // ID_operand->power_t.reset(); - // ID_misc->power_t.reset(); - if (coredynp.predictionW > 0) { - BTB->power_t.reset(); - } - - icache.power_t.readOp.dynamic += - (icache.caches->stats_t.readAc.hit * - icache.caches->local_result.power.readOp.dynamic + - // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss * - icache.caches->local_result.power.readOp - .dynamic + // assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss * - icache.caches->local_result.power.writeOp - .dynamic); // read miss in Icache cause a write to Icache - icache.power_t.readOp.dynamic += - icache.missb->stats_t.readAc.access * - icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access * - icache.missb->local_result.power.writeOp - .dynamic; // each access to missb involves a CAM and a write - icache.power_t.readOp.dynamic += - icache.ifb->stats_t.readAc.access * - icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access * - icache.ifb->local_result.power.writeOp.dynamic; - icache.power_t.readOp.dynamic += - icache.prefetchb->stats_t.readAc.access * - icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access * - icache.prefetchb->local_result.power.writeOp.dynamic; - - IB->power_t.readOp.dynamic += - IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + - IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; - - if (coredynp.predictionW > 0) { - BTB->power_t.readOp.dynamic += - BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; - - BPT->computeEnergy(is_tdp); - } - - if (is_tdp) { - // icache.power = icache.power_t + - // (icache.caches->local_result.power)*pppm_lkg + - // (icache.missb->local_result.power + - // icache.ifb->local_result.power + - // icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power) * - pppm_lkg; - - IB->power = IB->power_t + IB->local_result.power * pppm_lkg; - power = power + icache.power + IB->power; - if (coredynp.predictionW > 0) { - BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; - power = power + BTB->power + BPT->power; - } - - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; - - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; - - power = power + (ID_inst->power + ID_operand->power + ID_misc->power); - } else { - // icache.rt_power = icache.power_t + - // (icache.caches->local_result.power)*pppm_lkg + - // (icache.missb->local_result.power + - // icache.ifb->local_result.power + - // icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power) * - pppm_lkg; - - IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; - if (coredynp.predictionW > 0) { - BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; - } - - ID_inst->rt_power.readOp.dynamic = - ID_inst->power_t.readOp.dynamic * ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * - ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = - ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; - - rt_power = rt_power + - (ID_inst->rt_power + ID_operand->rt_power + ID_misc->rt_power); - } -} - -void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - - cout << indent_str << "Instruction Cache:" << endl; - cout << indent_str_next << "Area = " << icache.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << icache.power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? icache.power.readOp.longer_channel_leakage - : icache.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? icache.power.readOp.power_gated_with_long_channel_leakage - : icache.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << icache.rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (coredynp.predictionW > 0) { - cout << indent_str << "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? BTB->power.readOp.longer_channel_leakage - : BTB->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? BTB->power.readOp.power_gated_with_long_channel_leakage - : BTB->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (BPT->exist) { - cout << indent_str << "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? BPT->power.readOp.longer_channel_leakage - : BPT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? BPT->power.readOp.power_gated_with_long_channel_leakage - : BPT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 3) { - BPT->displayEnergy(indent + 4, plevel, is_tdp); - } - } - } - cout << indent_str << "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? IB->power.readOp.longer_channel_leakage - : IB->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? IB->power.readOp.power_gated_with_long_channel_leakage - : IB->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - cout << indent_str << "Instruction Decoder:" << endl; - cout << indent_str_next << "Area = " - << (ID_inst->area.get_area() + ID_operand->area.get_area() + - ID_misc->area.get_area()) * - coredynp.decodeW * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << (ID_inst->power.readOp.dynamic + ID_operand->power.readOp.dynamic + - ID_misc->power.readOp.dynamic) * - clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? (ID_inst->power.readOp.longer_channel_leakage + - ID_operand->power.readOp.longer_channel_leakage + - ID_misc->power.readOp.longer_channel_leakage) - : (ID_inst->power.readOp.leakage + - ID_operand->power.readOp.leakage + - ID_misc->power.readOp.leakage)) - << " W" << endl; - - double tot_leakage = - (ID_inst->power.readOp.leakage + ID_operand->power.readOp.leakage + - ID_misc->power.readOp.leakage); - double tot_leakage_longchannel = - (ID_inst->power.readOp.longer_channel_leakage + - ID_operand->power.readOp.longer_channel_leakage + - ID_misc->power.readOp.longer_channel_leakage); - double tot_leakage_pg = (ID_inst->power.readOp.power_gated_leakage + - ID_operand->power.readOp.power_gated_leakage + - ID_misc->power.readOp.power_gated_leakage); - double tot_leakage_pg_with_long_channel = - (ID_inst->power.readOp.power_gated_with_long_channel_leakage + - ID_operand->power.readOp.power_gated_with_long_channel_leakage + - ID_misc->power.readOp.power_gated_with_long_channel_leakage); - - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) - << " W" << endl; - cout << indent_str_next << "Gate Leakage = " - << (ID_inst->power.readOp.gate_leakage + - ID_operand->power.readOp.gate_leakage + - ID_misc->power.readOp.gate_leakage) - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << (ID_inst->rt_power.readOp.dynamic + - ID_operand->rt_power.readOp.dynamic + - ID_misc->rt_power.readOp.dynamic) / - executionTime - << " W" << endl; - cout << endl; - } else { - // cout << indent_str_next << "Instruction Cache Peak Dynamic = " - //<< icache.rt_power.readOp.dynamic*clockRate << " W" << endl; - // cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " - // << icache.rt_power.readOp.leakage <<" W" << endl; cout << - // indent_str_next << "Instruction Cache Gate Leakage = " << - // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next << "Instruction Buffer Peak Dynamic = " << - // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << - // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << - // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - // << "Instruction Buffer Gate Leakage = " << - // IB->rt_power.readOp.gate_leakage - //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer - // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << - // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold - // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout - // << indent_str_next << "Branch Target Buffer Gate Leakage = " << - // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next << "Branch Predictor Peak Dynamic = " << - // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << - // BPT->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "Branch Predictor Gate Leakage = " << - // BPT->rt_power.readOp.gate_leakage - //<< " W" << endl; - } -} - -void RENAMINGU::computeEnergy(bool is_tdp) { - if (!exist) - return; - double pppm_t[4] = {1, 1, 1, 1}; - if (is_tdp) { // init stats for Peak - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - } - ifreeL->stats_t.readAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = - coredynp.decodeW; // ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = - coredynp.decodeW; // ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; - } else if (coredynp.scheu_ty == ReservationStation) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - } - // Unified free list for both int and fp - ifreeL->stats_t.readAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } else { - if (coredynp.issueW > 1) { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - } - - } else { // init stats for Runtime Dynamic (RTP) - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = - XML->sys.core[ithCore] - .rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = - XML->sys.core[ithCore] - .fp_rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - fRRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - } - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = - 2 * XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = - 2 * XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; - } else if (coredynp.scheu_ty == ReservationStation) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - // iFRAT->stats_t.searchAc.access = - // XML->sys.core[ithCore].committed_int_instructions;//hack: not all - // committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - // fFRAT->stats_t.searchAc.access = - // XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = - XML->sys.core[ithCore] - .rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = - XML->sys.core[ithCore] - .fp_rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - fRRAT->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - } - // Unified free list for both int and fp since the ROB act as physcial - // registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = - 2 * (XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore] - .fp_rename_writes); // HACK: 2-> since some of renaming in - // the same group are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * - XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3 * coredynp.fp_issueW * - coredynp.fp_issueW * - XML->sys.core[ithCore].fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } else { - if (coredynp.issueW > 1) { - idcl->stats_t.readAc.access = - 2 * XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - } - } - /* Compute engine */ - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.readOp.dynamic + - idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.readOp.dynamic + - fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.searchOp.dynamic + - idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.searchOp.dynamic + - fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); - } - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += - (iRRAT->stats_t.readAc.access * - iRRAT->local_result.power.readOp.dynamic + - iRRAT->stats_t.writeAc.access * - iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += - (fRRAT->stats_t.readAc.access * - fRRAT->local_result.power.readOp.dynamic + - fRRAT->stats_t.writeAc.access * - fRRAT->local_result.power.writeOp.dynamic); - } - - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += - (ifreeL->stats_t.readAc.access * - ifreeL->local_result.power.readOp.dynamic + - ifreeL->stats_t.writeAc.access * - ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += - (ffreeL->stats_t.readAc.access * - ffreeL->local_result.power.readOp.dynamic + - ffreeL->stats_t.writeAc.access * - ffreeL->local_result.power.writeOp.dynamic); - - } else if (coredynp.scheu_ty == ReservationStation) { - if (coredynp.rm_ty == RAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.readOp.dynamic + - idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.readOp.dynamic + - fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); - } else if (coredynp.rm_ty == CAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.searchOp.dynamic + - idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.searchOp.dynamic + - fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); - } - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += - (iRRAT->stats_t.readAc.access * - iRRAT->local_result.power.readOp.dynamic + - iRRAT->stats_t.writeAc.access * - iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += - (fRRAT->stats_t.readAc.access * - fRRAT->local_result.power.readOp.dynamic + - fRRAT->stats_t.writeAc.access * - fRRAT->local_result.power.writeOp.dynamic); - } - - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += - (ifreeL->stats_t.readAc.access * - ifreeL->local_result.power.readOp.dynamic + - ifreeL->stats_t.writeAc.access * - ifreeL->local_result.power.writeOp.dynamic); - } - - } else { - if (coredynp.issueW > 1) { - idcl->power_t.reset(); - fdcl->power_t.reset(); - set_pppm(pppm_t, - idcl->stats_t.readAc.access, - coredynp.num_hthreads, - coredynp.num_hthreads, - idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, - fdcl->stats_t.readAc.access, - coredynp.num_hthreads, - coredynp.num_hthreads, - idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; - } - } - - // assign value to tpd and rtp - if (is_tdp) { - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT->power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power; - power = power + - (iFRAT->power + fFRAT->power) - //+ (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; - power = power + (iRRAT->power + fRRAT->power); - } - } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT->power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; - power = power + (iFRAT->power + fFRAT->power) + ifreeL->power; - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; - power = power + (iRRAT->power + fRRAT->power); - } - } - } else { - power = power + idcl->power_t + fdcl->power_t; - } - - } else { - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT->rt_power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->rt_power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power; - rt_power = rt_power + - (iFRAT->rt_power + fFRAT->rt_power) - // + (iRRAT->rt_power + - // fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); - } - } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT->rt_power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->rt_power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; - rt_power = - rt_power + (iFRAT->rt_power + fFRAT->rt_power) + ifreeL->rt_power; - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); - } - } - } else { - rt_power = rt_power + idcl->power_t + fdcl->power_t; - } - } -} - -void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - - if (coredynp.core_ty == OOO) { - cout << indent_str << "Int Front End RAT with " - << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << iFRAT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iFRAT->power.readOp.longer_channel_leakage - : iFRAT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? iFRAT->power.readOp.power_gated_with_long_channel_leakage - : iFRAT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << iFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "FP Front End RAT with " - << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << fFRAT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << fFRAT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fFRAT->power.readOp.longer_channel_leakage - : fFRAT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? fFRAT->power.readOp.power_gated_with_long_channel_leakage - : fFRAT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << fFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "Free List:" << endl; - cout << indent_str_next << "Area = " << ifreeL->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << ifreeL->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ifreeL->power.readOp.longer_channel_leakage - : ifreeL->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? ifreeL->power.readOp.power_gated_with_long_channel_leakage - : ifreeL->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << ifreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - cout << indent_str << "Int Retire RAT: " << endl; - cout << indent_str_next << "Area = " << iRRAT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << iRRAT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iRRAT->power.readOp.longer_channel_leakage - : iRRAT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? iRRAT->power.readOp - .power_gated_with_long_channel_leakage - : iRRAT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << iRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "FP Retire RAT:" << endl; - cout << indent_str_next << "Area = " << fRRAT->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << fRRAT->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fRRAT->power.readOp.longer_channel_leakage - : fRRAT->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? fRRAT->power.readOp - .power_gated_with_long_channel_leakage - : fRRAT->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << fRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - if (coredynp.scheu_ty == PhysicalRegFile) { - cout << indent_str << "FP Free List:" << endl; - cout << indent_str_next << "Area = " << ffreeL->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << ffreeL->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ffreeL->power.readOp.longer_channel_leakage - : ffreeL->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? ffreeL->power.readOp - .power_gated_with_long_channel_leakage - : ffreeL->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << ffreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - } else { - cout << indent_str << "Int DCL:" << endl; - cout << indent_str_next - << "Peak Dynamic = " << idcl->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? idcl->power.readOp.longer_channel_leakage - : idcl->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? idcl->power.readOp.power_gated_with_long_channel_leakage - : idcl->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << idcl->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << indent_str << "FP DCL:" << endl; - cout << indent_str_next - << "Peak Dynamic = " << fdcl->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fdcl->power.readOp.longer_channel_leakage - : fdcl->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? fdcl->power.readOp.power_gated_with_long_channel_leakage - : fdcl->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << fdcl->rt_power.readOp.dynamic / executionTime << " W" << endl; - } - } else { - if (coredynp.core_ty == OOO) { - cout << indent_str_next << "Int Front End RAT Peak Dynamic = " - << iFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " - << iFRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Int Front End RAT Gate Leakage = " - << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Front End RAT Peak Dynamic = " - << fFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " - << fFRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Front End RAT Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Free List Peak Dynamic = " - << ifreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Free List Subthreshold Leakage = " - << ifreeL->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Free List Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.scheu_ty == PhysicalRegFile) { - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - cout << indent_str_next << "Int Retire RAT Peak Dynamic = " - << iRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " - << iRRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Int Retire RAT Gate Leakage = " - << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Retire RAT Peak Dynamic = " - << fRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " - << fRRAT->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Retire RAT Gate Leakage = " - << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; - } - cout << indent_str_next << "FP Free List Peak Dynamic = " - << ffreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "FP Free List Subthreshold Leakage = " - << ffreeL->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Free List Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; - } - } else { - cout << indent_str_next << "Int DCL Peak Dynamic = " - << idcl->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Int DCL Subthreshold Leakage = " - << idcl->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next - << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "FP DCL Peak Dynamic = " - << fdcl->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "FP DCL Subthreshold Leakage = " - << fdcl->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next - << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage - << " W" << endl; - } - } -} - -void SchedulerU::computeEnergy(bool is_tdp) { - if (!exist) - return; - double ROB_duty_cycle; - // ROB_duty_cycle = ((coredynp.ALU_duty_cycle + - // coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 - // + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 - //? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 - // + - // coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; - ROB_duty_cycle = 1; - // init stats - if (is_tdp) { - if (coredynp.core_ty == OOO) { - int_inst_window->stats_t.readAc.access = - coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = - coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = - coredynp.issueW * coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = - fp_inst_window->l_ip.num_rd_ports * coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = - fp_inst_window->l_ip.num_wr_ports * coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = - fp_inst_window->l_ip.num_search_ports * coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->stats_t.readAc.access = - coredynp.commitW * coredynp.num_pipelines * ROB_duty_cycle; - ROB->stats_t.writeAc.access = - coredynp.issueW * coredynp.num_pipelines * ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; - - /* - * When inst commits, ROB must be read. - * Because for Physcial register based cores, physical register tag in - * ROB need to be read out and write into RRAT/CAM based RAT. For RS - * based cores, register content that stored in ROB must be read out and - * stored in architectural registers. - * - * if no-register is involved, the ROB read out operation when - * instruction commits can be ignored. assuming 20% insts. belong this - * type. - * TODO: ROB duty_cycle need to be revisited - */ - } - - } else if (coredynp.multithreaded) { - int_inst_window->stats_t.readAc.access = - coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = - coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = - coredynp.issueW * coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - } - - } else { // rtp - if (coredynp.core_ty == OOO) { - int_inst_window->stats_t.readAc.access = - XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = - XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = - XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = - XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = - XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = - XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size > 0) { - - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; - /* ROB need to be updated in RS based OOO when new values are produced, - * this update may happen before the commit stage when ROB entry is - * released - * 1. ROB write at instruction inserted in - * 2. ROB write as results produced (for RS based OOO only) - * 3. ROB read as instruction committed. For RS based OOO, data values - * are read out and sent to ARF For Physical reg based OOO, no data - * stored in ROB, but register tags need to be read out and used to set - * the RRAT and to recycle the register tag to free list buffer - */ - ROB->rtp_stats = ROB->stats_t; - } - - } else if (coredynp.multithreaded) { - int_inst_window->stats_t.readAc.access = - XML->sys.core[ithCore].int_instructions + - XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = - XML->sys.core[ithCore].int_instructions + - XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = - 2 * (XML->sys.core[ithCore].int_instructions + - XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; - } - } - - // computation engine - if (coredynp.core_ty == OOO) { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); - - /* each instruction needs to write to scheduler, read out when all resources - * and source operands are ready two search ops with one for each source - * operand - * - */ - int_inst_window->power_t.readOp.dynamic += - int_inst_window->local_result.power.readOp.dynamic * - int_inst_window->stats_t.readAc.access + - int_inst_window->local_result.power.searchOp.dynamic * - int_inst_window->stats_t.searchAc.access + - int_inst_window->local_result.power.writeOp.dynamic * - int_inst_window->stats_t.writeAc.access + - int_inst_window->stats_t.readAc.access * - instruction_selection->power.readOp.dynamic; - - fp_inst_window->power_t.readOp.dynamic += - fp_inst_window->local_result.power.readOp.dynamic * - fp_inst_window->stats_t.readAc.access + - fp_inst_window->local_result.power.searchOp.dynamic * - fp_inst_window->stats_t.searchAc.access + - fp_inst_window->local_result.power.writeOp.dynamic * - fp_inst_window->stats_t.writeAc.access + - fp_inst_window->stats_t.writeAc.access * - instruction_selection->power.readOp.dynamic; - - if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += - ROB->local_result.power.readOp.dynamic * ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access * ROB->local_result.power.writeOp.dynamic; - } - - } else if (coredynp.multithreaded) { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += - int_inst_window->local_result.power.readOp.dynamic * - int_inst_window->stats_t.readAc.access + - int_inst_window->local_result.power.searchOp.dynamic * - int_inst_window->stats_t.searchAc.access + - int_inst_window->local_result.power.writeOp.dynamic * - int_inst_window->stats_t.writeAc.access + - int_inst_window->stats_t.writeAc.access * - instruction_selection->power.readOp.dynamic; - } - - // assign values - if (is_tdp) { - if (coredynp.core_ty == OOO) { - int_inst_window->power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - fp_inst_window->power = - fp_inst_window->power_t + - (fp_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; - if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->power = ROB->power_t + ROB->local_result.power * pppm_lkg; - power = power + ROB->power; - } - - } else if (coredynp.multithreaded) { - // set_pppm(pppm_t, - // XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - power = power + int_inst_window->power; - } - - } else { // rtp - if (coredynp.core_ty == OOO) { - int_inst_window->rt_power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - fp_inst_window->rt_power = - fp_inst_window->power_t + - (fp_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - rt_power = - rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; - if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->rt_power = ROB->power_t + ROB->local_result.power * pppm_lkg; - rt_power = rt_power + ROB->rt_power; - } - - } else if (coredynp.multithreaded) { - // set_pppm(pppm_t, - // XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * - pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; - } - } - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - // cout<<"Scheduler - // power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - if (coredynp.core_ty == OOO) { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next - << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? int_inst_window->power.readOp.longer_channel_leakage - : int_inst_window->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? int_inst_window->power.readOp - .power_gated_with_long_channel_leakage - : int_inst_window->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" - << endl; - cout << endl; - cout << indent_str << "FP Instruction Window:" << endl; - cout << indent_str_next - << "Area = " << fp_inst_window->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << fp_inst_window->power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? fp_inst_window->power.readOp.longer_channel_leakage - : fp_inst_window->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? fp_inst_window->power.readOp - .power_gated_with_long_channel_leakage - : fp_inst_window->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << fp_inst_window->rt_power.readOp.dynamic / executionTime << " W" - << endl; - cout << endl; - if (XML->sys.core[ithCore].ROB_size > 0) { - cout << indent_str << "ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << ROB->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ROB->power.readOp.longer_channel_leakage - : ROB->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? ROB->power.readOp.power_gated_with_long_channel_leakage - : ROB->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << ROB->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - } else if (coredynp.multithreaded) { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next - << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next << "Peak Dynamic = " - << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? int_inst_window->power.readOp.longer_channel_leakage - : int_inst_window->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? int_inst_window->power.readOp - .power_gated_with_long_channel_leakage - : int_inst_window->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" - << endl; - cout << endl; - } - } else { - if (coredynp.core_ty == OOO) { - cout << indent_str_next << "Instruction Window Peak Dynamic = " - << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " - << int_inst_window->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " - << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Peak Dynamic = " - << fp_inst_window->rt_power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next - << "FP Instruction Window Subthreshold Leakage = " - << fp_inst_window->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Gate Leakage = " - << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - if (XML->sys.core[ithCore].ROB_size > 0) { - cout << indent_str_next << "ROB Peak Dynamic = " - << ROB->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next - << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage - << " W" << endl; - } - } else if (coredynp.multithreaded) { - cout << indent_str_next << "Instruction Window Peak Dynamic = " - << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " - << int_inst_window->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " - << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - } - } -} - -void LoadStoreU::computeEnergy(bool is_tdp) { - if (!exist) - return; - if (is_tdp) { - // init stats for Peak - dcache.caches->stats_t.readAc.access = - 0.67 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = - 0.33 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = - dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.missb->stats_t.writeAc.access = - dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.ifb->stats_t.writeAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.prefetchb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.prefetchb->stats_t.writeAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; - if (cache_p == Write_back) { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; - } - - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = - LSQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = - LoadQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; - } - } else { - // init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = - XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = - XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = - XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = - XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; - - if (cache_p == Write_back) { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; - } else { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = - dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - } - - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions) * - 2; // flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions) * - 2; - LSQ->rtp_stats = LSQ->stats_t; - - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; - } - } - - dcache.power_t.reset(); - LSQ->power_t.reset(); - dcache.power_t.readOp.dynamic += - (dcache.caches->stats_t.readAc.hit * - dcache.caches->local_result.power.readOp.dynamic + - dcache.caches->stats_t.readAc.miss * - dcache.caches->local_result.power.readOp - .dynamic + // assuming D cache is in the fast model which read - // tag and data together - dcache.caches->stats_t.writeAc.miss * - dcache.caches->local_result.tag_array2->power.readOp.dynamic + - dcache.caches->stats_t.writeAc.access * - dcache.caches->local_result.power.writeOp.dynamic); - - if (cache_p == Write_back) { // write miss will generate a write later - dcache.power_t.readOp.dynamic += - dcache.caches->stats_t.writeAc.miss * - dcache.caches->local_result.power.writeOp.dynamic; - } - - dcache.power_t.readOp.dynamic += - dcache.missb->stats_t.readAc.access * - dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access * - dcache.missb->local_result.power.writeOp - .dynamic; // each access to missb involves a CAM and a write - dcache.power_t.readOp.dynamic += - dcache.ifb->stats_t.readAc.access * - dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access * - dcache.ifb->local_result.power.writeOp.dynamic; - dcache.power_t.readOp.dynamic += - dcache.prefetchb->stats_t.readAc.access * - dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access * - dcache.prefetchb->local_result.power.writeOp.dynamic; - if (cache_p == Write_back) { - dcache.power_t.readOp.dynamic += - dcache.wbb->stats_t.readAc.access * - dcache.wbb->local_result.power.searchOp.dynamic + - dcache.wbb->stats_t.writeAc.access * - dcache.wbb->local_result.power.writeOp.dynamic; - } - - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += - LoadQ->stats_t.readAc.access * - (LoadQ->local_result.power.searchOp.dynamic + - LoadQ->local_result.power.readOp.dynamic) + - LoadQ->stats_t.writeAc.access * - LoadQ->local_result.power.writeOp - .dynamic; // every memory access invloves at least two - // operations on LoadQ - - LSQ->power_t.readOp.dynamic += - LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + - LSQ->local_result.power.readOp.dynamic) + - LSQ->stats_t.writeAc.access * - LSQ->local_result.power.writeOp - .dynamic; // every memory access invloves at least two - // operations on LSQ - - } else { - LSQ->power_t.readOp.dynamic += - LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + - LSQ->local_result.power.readOp.dynamic) + - LSQ->stats_t.writeAc.access * - LSQ->local_result.power.writeOp - .dynamic; // every memory access invloves at least two - // operations on LSQ - } - - if (is_tdp) { - // dcache.power = dcache.power_t + - // (dcache.caches->local_result.power)*pppm_lkg + - // (dcache.missb->local_result.power + - // dcache.ifb->local_result.power + - // dcache.prefetchb->local_result.power + - // dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) * - pppm_lkg; - if (cache_p == Write_back) { - dcache.power = dcache.power + dcache.wbb->local_result.power * pppm_lkg; - } - - LSQ->power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; - power = power + dcache.power + LSQ->power; - - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; - power = power + LoadQ->power; - } - } else { - // dcache.rt_power = dcache.power_t + - // (dcache.caches->local_result.power + - // dcache.missb->local_result.power - // + dcache.ifb->local_result.power + - // dcache.prefetchb->local_result.power + - // dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) * - pppm_lkg; - - if (cache_p == Write_back) { - dcache.rt_power = - dcache.rt_power + dcache.wbb->local_result.power * pppm_lkg; - } - - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; - - if ((coredynp.core_ty == OOO) && - (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; - } - } -} - -void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << indent_str << "Data Cache:" << endl; - cout << indent_str_next << "Area = " << dcache.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << dcache.power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? dcache.power.readOp.longer_channel_leakage - : dcache.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? dcache.power.readOp.power_gated_with_long_channel_leakage - : dcache.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << dcache.rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (coredynp.core_ty == Inorder) { - cout << indent_str << "Load/Store Queue:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LSQ->power.readOp.longer_channel_leakage - : LSQ->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? LSQ->power.readOp.power_gated_with_long_channel_leakage - : LSQ->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } else - - { - if (XML->sys.core[ithCore].load_buffer_size > 0) { - cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << LoadQ->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LoadQ->power.readOp.longer_channel_leakage - : LoadQ->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? LoadQ->power.readOp - .power_gated_with_long_channel_leakage - : LoadQ->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << LoadQ->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - cout << indent_str << "StoreQ:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LSQ->power.readOp.longer_channel_leakage - : LSQ->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? LSQ->power.readOp.power_gated_with_long_channel_leakage - : LSQ->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - } else { - cout << indent_str_next << "Data Cache Peak Dynamic = " - << dcache.rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Data Cache Subthreshold Leakage = " - << dcache.rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Data Cache Gate Leakage = " - << dcache.rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.core_ty == Inorder) { - cout << indent_str_next << "Load/Store Queue Peak Dynamic = " - << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " - << LSQ->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Load/Store Queue Gate Leakage = " - << LSQ->rt_power.readOp.gate_leakage << " W" << endl; - } else { - cout << indent_str_next << "LoadQ Peak Dynamic = " - << LoadQ->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "LoadQ Subthreshold Leakage = " - << LoadQ->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next - << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "StoreQ Peak Dynamic = " - << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next - << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage - << " W" << endl; - } - } -} - -void MemManU::computeEnergy(bool is_tdp) { - - if (!exist) - return; - if (is_tdp) { - // init stats for Peak - itlb->stats_t.readAc.access = - itlb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = - itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = - dtlb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = - dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; - } else { - // init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = - itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = - dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; - } - - itlb->power_t.reset(); - dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += - itlb->stats_t.readAc.access * itlb->local_result.power.searchOp - .dynamic // FA spent most power in tag, - // so use total access not hits - + itlb->stats_t.readAc.miss * itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += - dtlb->stats_t.readAc.access * dtlb->local_result.power.searchOp - .dynamic // FA spent most power in tag, - // so use total access not hits - + dtlb->stats_t.readAc.miss * dtlb->local_result.power.writeOp.dynamic; - - if (is_tdp) { - itlb->power = itlb->power_t + itlb->local_result.power * pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; - power = power + itlb->power + dtlb->power; - } else { - itlb->rt_power = itlb->power_t + itlb->local_result.power * pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; - } -} - -void MemManU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << itlb->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? itlb->power.readOp.longer_channel_leakage - : itlb->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? itlb->power.readOp.power_gated_with_long_channel_leakage - : itlb->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << itlb->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - cout << indent_str << "Dtlb:" << endl; - cout << indent_str_next << "Area = " << dtlb->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << dtlb->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? dtlb->power.readOp.longer_channel_leakage - : dtlb->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? dtlb->power.readOp.power_gated_with_long_channel_leakage - : dtlb->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << dtlb->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } else { - cout << indent_str_next << "Itlb Peak Dynamic = " - << itlb->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next - << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Dtlb Peak Dynamic = " - << dtlb->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next - << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage - << " W" << endl; - cout << indent_str_next - << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage - << " W" << endl; - } -} - -void RegFU::computeEnergy(bool is_tdp) { - /* - * Architecture RF and physical RF cannot be present at the same time. - * Therefore, the RF stats can only refer to either ARF or PRF; - * And the same stats can be used for both. - */ - if (!exist) - return; - if (is_tdp) { - // init stats for Peak - IRF->stats_t.readAc.access = - coredynp.issueW * 2 * - (coredynp.ALU_duty_cycle * 1.1 + - (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * - coredynp.num_pipelines; - IRF->stats_t.writeAc.access = - coredynp.issueW * - (coredynp.ALU_duty_cycle * 1.1 + - (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * - coredynp.num_pipelines; - // Rule of Thumb: about 10% RF related instructions do not need to access - // ALUs - IRF->tdp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports * - coredynp.FPU_duty_cycle * 1.05 * - coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports * - coredynp.FPU_duty_cycle * 1.05 * - coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; - if (coredynp.regWindowing) { - RFWIN->stats_t.readAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; - } - } else { - // init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = - XML->sys.core[ithCore] - .int_regfile_reads; // TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; - if (coredynp.regWindowing) { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls * 16; - RFWIN->stats_t.writeAc.access = - XML->sys.core[ithCore].function_calls * 16; - RFWIN->rtp_stats = RFWIN->stats_t; - - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + - XML->sys.core[ithCore].function_calls * 16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + - XML->sys.core[ithCore].function_calls * 16; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + - XML->sys.core[ithCore].function_calls * 16; - ; - FRF->stats_t.writeAc.access = - XML->sys.core[ithCore].float_regfile_writes + - XML->sys.core[ithCore].function_calls * 16; - ; - FRF->rtp_stats = FRF->stats_t; - } - } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += - (IRF->stats_t.readAc.access * IRF->local_result.power.readOp.dynamic + - IRF->stats_t.writeAc.access * IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += - (FRF->stats_t.readAc.access * FRF->local_result.power.readOp.dynamic + - FRF->stats_t.writeAc.access * FRF->local_result.power.writeOp.dynamic); - if (coredynp.regWindowing) { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += - (RFWIN->stats_t.readAc.access * - RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access * - RFWIN->local_result.power.writeOp.dynamic); - } - - if (is_tdp) { - IRF->power = IRF->power_t + - ((coredynp.scheu_ty == ReservationStation) - ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) - : IRF->local_result.power); - FRF->power = FRF->power_t + - ((coredynp.scheu_ty == ReservationStation) - ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) - : FRF->local_result.power); - power = power + (IRF->power + FRF->power); - if (coredynp.regWindowing) { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; - power = power + RFWIN->power; - } - } else { - IRF->rt_power = - IRF->power_t + - ((coredynp.scheu_ty == ReservationStation) - ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) - : IRF->local_result.power); - FRF->rt_power = - FRF->power_t + - ((coredynp.scheu_ty == ReservationStation) - ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) - : FRF->local_result.power); - rt_power = rt_power + (IRF->power_t + FRF->power_t); - if (coredynp.regWindowing) { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; - } - } -} - -void RegFU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << IRF->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? IRF->power.readOp.longer_channel_leakage - : IRF->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? IRF->power.readOp.power_gated_with_long_channel_leakage - : IRF->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - cout << indent_str << "Floating Point RF:" << endl; - cout << indent_str_next << "Area = " << FRF->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << FRF->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? FRF->power.readOp.longer_channel_leakage - : FRF->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? FRF->power.readOp.power_gated_with_long_channel_leakage - : FRF->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - if (coredynp.regWindowing) { - cout << indent_str << "Register Windows:" << endl; - cout << indent_str_next << "Area = " << RFWIN->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << RFWIN->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? RFWIN->power.readOp.longer_channel_leakage - : RFWIN->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? RFWIN->power.readOp.power_gated_with_long_channel_leakage - : RFWIN->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << RFWIN->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } - } else { - cout << indent_str_next << "Integer RF Peak Dynamic = " - << IRF->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Integer RF Subthreshold Leakage = " - << IRF->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next - << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next << "Floating Point RF Peak Dynamic = " - << FRF->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " - << FRF->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Floating Point RF Gate Leakage = " - << FRF->rt_power.readOp.gate_leakage << " W" << endl; - if (coredynp.regWindowing) { - cout << indent_str_next << "Register Windows Peak Dynamic = " - << RFWIN->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Register Windows Subthreshold Leakage = " - << RFWIN->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Register Windows Gate Leakage = " - << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; - } - } -} - -void EXECU::computeEnergy(bool is_tdp) { - if (!exist) - return; - double pppm_t[4] = {1, 1, 1, 1}; - // rfu->power.reset(); - // rfu->rt_power.reset(); - // scheu->power.reset(); - // scheu->rt_power.reset(); - // exeu->power.reset(); - // exeu->rt_power.reset(); - - rfu->computeEnergy(is_tdp); - scheu->computeEnergy(is_tdp); - exeu->computeEnergy(is_tdp); - if (coredynp.num_fpus > 0) { - fp_u->computeEnergy(is_tdp); - } - if (coredynp.num_muls > 0) { - mul->computeEnergy(is_tdp); - } - - if (is_tdp) { - set_pppm( - pppm_t, - 2 * coredynp.ALU_cdb_duty_cycle, - 2, - 2, - 2 * coredynp - .ALU_cdb_duty_cycle); // 2 means two source operands needs to be - // passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power * pppm_t + - int_bypass->power * pppm_t; - if (coredynp.num_muls > 0) { - set_pppm( - pppm_t, - 2 * coredynp.MUL_cdb_duty_cycle, - 2, - 2, - 2 * coredynp - .MUL_cdb_duty_cycle); // 2 means two source operands needs to - // be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power * pppm_t + - int_mul_bypass->power * pppm_t; - power = power + mul->power; - } - if (coredynp.num_fpus > 0) { - set_pppm( - pppm_t, - 3 * coredynp.FPU_cdb_duty_cycle, - 3, - 3, - 3 * coredynp - .FPU_cdb_duty_cycle); // 3 means three source operands needs - // to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power * pppm_t + - fpTagBypass->power * pppm_t; - power = power + fp_u->power; - } - - power = power + rfu->power + exeu->power + bypass.power + scheu->power; - } else { - set_pppm(pppm_t, - XML->sys.core[ithCore].cdb_alu_accesses, - 2, - 2, - XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power * pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power * pppm_t; - - if (coredynp.num_muls > 0) { - set_pppm(pppm_t, - XML->sys.core[ithCore].cdb_mul_accesses, - 2, - 2, - XML->sys.core[ithCore] - .cdb_mul_accesses); // 2 means two source operands needs to - // be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power * pppm_t + - int_mul_bypass->power * pppm_t; - rt_power = rt_power + mul->rt_power; - } - - if (coredynp.num_fpus > 0) { - set_pppm(pppm_t, - XML->sys.core[ithCore].cdb_fpu_accesses, - 3, - 3, - XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; - rt_power = rt_power + fp_u->rt_power; - } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + - scheu->rt_power; - } -} - -void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) - return; - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - // cout << indent_str_next << "Results Broadcast Bus Area = " << - // bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) { - cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << rfu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? rfu->power.readOp.longer_channel_leakage - : rfu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? rfu->power.readOp.power_gated_with_long_channel_leakage - : rfu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - if (plevel > 3) { - rfu->displayEnergy(indent + 4, is_tdp); - } - cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << scheu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? scheu->power.readOp.longer_channel_leakage - : scheu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? scheu->power.readOp.power_gated_with_long_channel_leakage - : scheu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << scheu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 3) { - scheu->displayEnergy(indent + 4, is_tdp); - } - exeu->displayEnergy(indent, is_tdp); - if (coredynp.num_fpus > 0) { - fp_u->displayEnergy(indent, is_tdp); - } - if (coredynp.num_muls > 0) { - mul->displayEnergy(indent, is_tdp); - } - cout << indent_str << "Results Broadcast Bus:" << endl; - cout << indent_str_next - << "Area Overhead = " << bypass.area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next - << "Peak Dynamic = " << bypass.power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? bypass.power.readOp.longer_channel_leakage - : bypass.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? bypass.power.readOp.power_gated_with_long_channel_leakage - : bypass.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << bypass.rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - } else { - cout << indent_str_next << "Register Files Peak Dynamic = " - << rfu->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Register Files Subthreshold Leakage = " - << rfu->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Register Files Gate Leakage = " - << rfu->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " - << scheu->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " - << scheu->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Instruction Sheduler Gate Leakage = " - << scheu->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " - << bypass.rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " - << bypass.rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " - << bypass.rt_power.readOp.gate_leakage << " W" << endl; - } -} - -void Core::computeEnergy(bool is_tdp) { - /* - * When computing TDP, power = energy_per_cycle (the value computed in this - * function) * clock_rate (in the display_energy function) When computing - * dyn_power; power = total energy (the value computed in this function) / - * Total execution time (cycle count / clock rate) - */ - // power_point_product_masks - double pppm_t[4] = {1, 1, 1, 1}; - double rtp_pipeline_coe; - double num_units = 4.0; - if (is_tdp) { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty == OOO) { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm( - pppm_t, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / - num_units); // User need to feed a duty cycle to improve accuracy - if (rnu->exist) { - rnu->power = rnu->power + corepipe->power * pppm_t; - power = power + rnu->power; - } - } - - if (ifu->exist) { - set_pppm(pppm_t, - coredynp.num_pipelines / num_units * coredynp.IFU_duty_cycle, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - // cout << "IFU = " << - // ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power * pppm_t; - // cout << "IFU = " << - // ifu->power.readOp.dynamic*clockRate << " W" << endl; - // cout << "1/4 pipe = " << - // corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; - // cout << "core = " << - // power.readOp.dynamic*clockRate << " W" << endl; - } - if (lsu->exist) { - set_pppm(pppm_t, - coredynp.num_pipelines / num_units * coredynp.LSU_duty_cycle, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - lsu->power = lsu->power + corepipe->power * pppm_t; - // cout << "LSU = " << - // lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; - // cout << "core = " << - // power.readOp.dynamic*clockRate << " W" << endl; - } - if (exu->exist) { - set_pppm(pppm_t, - coredynp.num_pipelines / num_units * coredynp.ALU_duty_cycle, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - exu->power = exu->power + corepipe->power * pppm_t; - // cout << "EXE = " << - // exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; - // cout << "core = " << - // power.readOp.dynamic*clockRate << " W" << endl; - } - if (mmu->exist) { - set_pppm(pppm_t, - coredynp.num_pipelines / num_units * - (0.5 + 0.5 * coredynp.LSU_duty_cycle), - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - mmu->power = mmu->power + corepipe->power * pppm_t; - // cout << "MMU = " << - // mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; - // cout << "core = " << - // power.readOp.dynamic*clockRate << " W" << endl; - } - - power = power + undiffCore->power; - - if (XML->sys.Private_L2) { - - l2cache->computeEnergy(is_tdp); - set_pppm(pppm_t, l2cache->cachep.clockRate / clockRate, 1, 1, 1); - // l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power * pppm_t; - } - - } else { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty == OOO) { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - if (XML->sys.homogeneous_cores == 1) { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - XML->sys.total_cycles * XML->sys.number_of_cores; - } else { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, - coredynp.num_pipelines * rtp_pipeline_coe / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - if (rnu->exist) { - rnu->rt_power = rnu->rt_power + corepipe->power * pppm_t; - - rt_power = rt_power + rnu->rt_power; - } - } else { - num_units = 4.0; - } - - if (ifu->exist) { - if (XML->sys.homogeneous_cores == 1) { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.IFU_duty_cycle * XML->sys.total_cycles * - XML->sys.number_of_cores; - } else { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.IFU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, - coredynp.num_pipelines * rtp_pipeline_coe / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - ifu->rt_power = ifu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + ifu->rt_power; - } - if (lsu->exist) { - if (XML->sys.homogeneous_cores == 1) { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.LSU_duty_cycle * XML->sys.total_cycles * - XML->sys.number_of_cores; - } else { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.LSU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, - coredynp.num_pipelines * rtp_pipeline_coe / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - - lsu->rt_power = lsu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + lsu->rt_power; - } - if (exu->exist) { - if (XML->sys.homogeneous_cores == 1) { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.ALU_duty_cycle * XML->sys.total_cycles * - XML->sys.number_of_cores; - } else { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - coredynp.ALU_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, - coredynp.num_pipelines * rtp_pipeline_coe / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - exu->rt_power = exu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + exu->rt_power; - } - if (mmu->exist) { - if (XML->sys.homogeneous_cores == 1) { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - (0.5 + 0.5 * coredynp.LSU_duty_cycle) * - XML->sys.total_cycles * XML->sys.number_of_cores; - } else { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * - (0.5 + 0.5 * coredynp.LSU_duty_cycle) * - coredynp.total_cycles; - } - set_pppm(pppm_t, - coredynp.num_pipelines * rtp_pipeline_coe / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units, - coredynp.num_pipelines / num_units); - mmu->rt_power = mmu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + mmu->rt_power; - } - - rt_power = rt_power + undiffCore->power; - // cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" - //<< endl; - if (XML->sys.Private_L2) { - - l2cache->computeEnergy(is_tdp); - // set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - // l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; - } - } -} - -void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - string indent_str(indent, ' '); - string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; - - if (is_tdp) { - cout << "Core:" << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str - << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime - << " W" << endl; - cout << endl; - if (ifu->exist) { - cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << ifu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ifu->power.readOp.longer_channel_leakage - : ifu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? ifu->power.readOp.power_gated_with_long_channel_leakage - : ifu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << ifu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 2) { - ifu->displayEnergy(indent + 4, plevel, is_tdp); - } - } - if (coredynp.core_ty == OOO) { - if (rnu->exist) { - cout << indent_str << "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << rnu->power.readOp.dynamic * clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? rnu->power.readOp.longer_channel_leakage - : rnu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? rnu->power.readOp.power_gated_with_long_channel_leakage - : rnu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << rnu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 2) { - rnu->displayEnergy(indent + 4, plevel, is_tdp); - } - } - } - if (lsu->exist) { - cout << indent_str << "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << lsu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? lsu->power.readOp.longer_channel_leakage - : lsu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? lsu->power.readOp.power_gated_with_long_channel_leakage - : lsu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " - << lsu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 2) { - lsu->displayEnergy(indent + 4, plevel, is_tdp); - } - } - if (mmu->exist) { - cout << indent_str << "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << mmu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? mmu->power.readOp.longer_channel_leakage - : mmu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? mmu->power.readOp.power_gated_with_long_channel_leakage - : mmu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << mmu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 2) { - mmu->displayEnergy(indent + 4, plevel, is_tdp); - } - } - if (exu->exist) { - cout << indent_str << "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next - << "Peak Dynamic = " << exu->power.readOp.dynamic * clockRate << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? exu->power.readOp.longer_channel_leakage - : exu->power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? exu->power.readOp.power_gated_with_long_channel_leakage - : exu->power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " - << exu->rt_power.readOp.dynamic / executionTime << " W" << endl; - cout << endl; - if (plevel > 2) { - exu->displayEnergy(indent + 4, plevel, is_tdp); - } - } - // if (plevel >2) - // { - // if (undiffCore->exist) - // { - // cout << indent_str << "Undifferentiated Core" << - // endl; cout << indent_str_next << "Area = " << - // undiffCore->area.get_area()*1e-6<< " mm^2" << endl; cout - // << indent_str_next << "Peak Dynamic = " << - // undiffCore->power.readOp.dynamic*clockRate << " W" << endl; - //// cout << indent_str_next << "Subthreshold Leakage = " - ///<< undiffCore->power.readOp.leakage <<" W" << endl; - // cout << indent_str_next << "Subthreshold Leakage - //= - //" - // << - //(long_channel? - // undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) - //<< " W" << endl; cout << indent_str_next << "Gate Leakage = " - //<< undiffCore->power.readOp.gate_leakage << " W" << endl; - // // cout << indent_str_next << "Runtime Dynamic = " - //<< undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; - // cout - //<sys.Private_L2) { - - l2cache->displayEnergy(4, is_tdp); - } - - } else { - // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = - //" - //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " - // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << - // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << - // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next - //<< "Load Store Unit Peak Dynamic = " << - // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << - // lsu->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "Load Store Unit Gate Leakage = " << - // lsu->rt_power.readOp.gate_leakage - //<< " W" << endl; cout << indent_str_next << "Memory Management Unit - // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << - // endl; cout << indent_str_next << "Memory Management Unit Subthreshold - // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout - // << indent_str_next << "Memory Management Unit Gate Leakage = " << - // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next << "Execution Unit Peak Dynamic = " << - // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Execution Unit Subthreshold Leakage = " << - // exu->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "Execution Unit Gate Leakage = " << - // exu->rt_power.readOp.gate_leakage - //<< " W" << endl; - } -} -InstFetchU ::~InstFetchU() { - - if (!exist) - return; - if (IB) { - delete IB; - IB = 0; - } - if (ID_inst) { - delete ID_inst; - ID_inst = 0; - } - if (ID_operand) { - delete ID_operand; - ID_operand = 0; - } - if (ID_misc) { - delete ID_misc; - ID_misc = 0; - } - if (coredynp.predictionW > 0) { - if (BTB) { - delete BTB; - BTB = 0; - } - if (BPT) { - delete BPT; - BPT = 0; - } - } -} - -BranchPredictor ::~BranchPredictor() { - - if (!exist) - return; - if (globalBPT) { - delete globalBPT; - globalBPT = 0; - } - if (localBPT) { - delete localBPT; - localBPT = 0; - } - if (L1_localBPT) { - delete L1_localBPT; - L1_localBPT = 0; - } - if (L2_localBPT) { - delete L2_localBPT; - L2_localBPT = 0; - } - if (chooser) { - delete chooser; - chooser = 0; - } - if (RAS) { - delete RAS; - RAS = 0; - } -} - -RENAMINGU ::~RENAMINGU() { - - if (!exist) - return; - if (iFRAT) { - delete iFRAT; - iFRAT = 0; - } - if (iRRAT) { - delete iRRAT; - iRRAT = 0; - } - if (iFRAT) { - delete iFRAT; - iFRAT = 0; - } - if (ifreeL) { - delete ifreeL; - ifreeL = 0; - } - if (idcl) { - delete idcl; - idcl = 0; - } - if (fFRAT) { - delete fFRAT; - fFRAT = 0; - } - if (fRRAT) { - delete fRRAT; - fRRAT = 0; - } - if (fdcl) { - delete fdcl; - fdcl = 0; - } - if (ffreeL) { - delete ffreeL; - ffreeL = 0; - } - if (RAHT) { - delete RAHT; - RAHT = 0; - } -} - -LoadStoreU ::~LoadStoreU() { - - if (!exist) - return; - if (LSQ) { - delete LSQ; - LSQ = 0; - } - if (LoadQ) { - delete LoadQ; - LoadQ = 0; - } -} - -MemManU ::~MemManU() { - - if (!exist) - return; - if (itlb) { - delete itlb; - itlb = 0; - } - if (dtlb) { - delete dtlb; - dtlb = 0; - } -} - -RegFU ::~RegFU() { - - if (!exist) - return; - if (IRF) { - delete IRF; - IRF = 0; - } - if (FRF) { - delete FRF; - FRF = 0; - } - if (RFWIN) { - delete RFWIN; - RFWIN = 0; - } -} - -SchedulerU ::~SchedulerU() { - - if (!exist) - return; - if (int_inst_window) { - delete int_inst_window; - int_inst_window = 0; - } - if (fp_inst_window) { - delete fp_inst_window; - fp_inst_window = 0; - } - if (ROB) { - delete ROB; - ROB = 0; - } - if (instruction_selection) { - delete instruction_selection; - instruction_selection = 0; - } -} - -EXECU ::~EXECU() { - - if (!exist) - return; - if (int_bypass) { - delete int_bypass; - int_bypass = 0; - } - if (intTagBypass) { - delete intTagBypass; - intTagBypass = 0; - } - if (int_mul_bypass) { - delete int_mul_bypass; - int_mul_bypass = 0; - } - if (intTag_mul_Bypass) { - delete intTag_mul_Bypass; - intTag_mul_Bypass = 0; - } - if (fp_bypass) { - delete fp_bypass; - fp_bypass = 0; - } - if (fpTagBypass) { - delete fpTagBypass; - fpTagBypass = 0; - } - if (fp_u) { - delete fp_u; - fp_u = 0; - } - if (exeu) { - delete exeu; - exeu = 0; - } - if (mul) { - delete mul; - mul = 0; - } - if (rfu) { - delete rfu; - rfu = 0; - } - if (scheu) { - delete scheu; - scheu = 0; - } -} - -Core ::~Core() { - - if (ifu) { - delete ifu; - ifu = 0; - } - if (lsu) { - delete lsu; - lsu = 0; - } - if (rnu) { - delete rnu; - rnu = 0; - } - if (mmu) { - delete mmu; - mmu = 0; - } - if (exu) { - delete exu; - exu = 0; - } - if (corepipe) { - delete corepipe; - corepipe = 0; - } - if (undiffCore) { - delete undiffCore; - undiffCore = 0; - } - if (l2cache) { - delete l2cache; - l2cache = 0; - } -} - -void Core::set_core_param() { - coredynp.opt_local = XML->sys.core[ithCore].opt_local; - coredynp.x86 = XML->sys.core[ithCore].x86; - coredynp.Embedded = XML->sys.Embedded; - coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; - coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; - coredynp.fetchW = XML->sys.core[ithCore].fetch_width; - coredynp.decodeW = XML->sys.core[ithCore].decode_width; - coredynp.issueW = XML->sys.core[ithCore].issue_width; - coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; - coredynp.commitW = XML->sys.core[ithCore].commit_width; - coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; - coredynp.predictionW = XML->sys.core[ithCore].prediction_width; - coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; - coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; - coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; - coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; - coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; - coredynp.vdd = XML->sys.core[ithCore].vdd; - coredynp.power_gating_vcc = XML->sys.core[ithCore].power_gating_vcc; - - coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; - coredynp.multithreaded = coredynp.num_hthreads > 1 ? true : false; - coredynp.hthread_width = - int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))); - coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; - coredynp.pc_width = XML->sys.virtual_address_width; - - coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; - coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; - coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; - coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; - coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; - coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; - coredynp.int_data_width = int(ceil(XML->sys.machine_bits / 32.0)) * 32; - coredynp.fp_data_width = coredynp.int_data_width; - coredynp.v_address_width = XML->sys.virtual_address_width; - coredynp.p_address_width = XML->sys.physical_address_width; - - coredynp.scheu_ty = - (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; - coredynp.arch_ireg_width = - int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); - coredynp.arch_freg_width = - int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); - coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; - coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; - coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; - coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; - coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; - coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; - - // Max power duty cycle for peak power estimation - // if (coredynp.core_ty==OOO) - // { - // coredynp.IFU_duty_cycle = 1; - // coredynp.LSU_duty_cycle = 1; - // coredynp.MemManU_I_duty_cycle =1; - // coredynp.MemManU_D_duty_cycle =1; - // coredynp.ALU_duty_cycle =1; - // coredynp.MUL_duty_cycle =1; - // coredynp.FPU_duty_cycle =1; - // coredynp.ALU_cdb_duty_cycle =1; - // coredynp.MUL_cdb_duty_cycle =1; - // coredynp.FPU_cdb_duty_cycle =1; - // } - // else - // { - coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; - coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; - coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; - coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; - coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; - coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; - coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; - coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; - coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; - coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; - coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; - // } - - if (!((coredynp.core_ty == OOO) || (coredynp.core_ty == Inorder))) { - cout << "Invalid Core Type" << endl; - exit(0); - } - // if (coredynp.core_ty==OOO) - // { - // cout<<"OOO processor models are being updated and will be - // available in next release"<sys.core[ithCore].phy_Regs_IRF_size))); - coredynp.phy_freg_width = - int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); - coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = - XML->sys.core[ithCore].phy_Regs_IRF_size; - coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = - XML->sys.core[ithCore].phy_Regs_FRF_size; - } else if (coredynp.scheu_ty == - ReservationStation) { // ROB serves as Phy RF in RS based OOO - coredynp.phy_ireg_width = - int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.phy_freg_width = - int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; - coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; - } - } - - int GC_count = - XML->sys.core[ithCore] - .checkpoint_depth; // best check pointing entries for a 4~8 issue OOO - // should be 8~48;See TR for reference. - if (coredynp.rm_ty == RAMbased) { - coredynp.globalCheckpoint = - GC_count > 4 ? 4 : GC_count; // RAM-based RAT cannot have more than 4 - // GCs; see "a power-aware hybrid ram-cam - // renaming mechanism for fast recovery" - } else if (coredynp.rm_ty == CAMbased) { - coredynp.globalCheckpoint = GC_count < 1 ? 1 : GC_count; - } - - coredynp.perThreadState = 8; - coredynp.instruction_length = 32; - coredynp.clockRate = XML->sys.core[ithCore].clock_rate; - coredynp.clockRate *= 1e6; - coredynp.regWindowing = (XML->sys.core[ithCore].register_windows_size > 0 && - coredynp.core_ty == Inorder) - ? true - : false; - coredynp.executionTime = XML->sys.total_cycles / coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, - 0, - coredynp.num_hthreads, - coredynp.num_hthreads, - 0); - - // does not care device types, since all core device types are set at sys. - // level - if (coredynp.vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = coredynp.vdd; - interface_ip.lop_Vdd = coredynp.vdd; - interface_ip.lstp_Vdd = coredynp.vdd; - } - - if (coredynp.power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = coredynp.power_gating_vcc; - } -} diff --git a/src/core.h b/src/core.h deleted file mode 100644 index 12e99b0..0000000 --- a/src/core.h +++ /dev/null @@ -1,282 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#ifndef CORE_H_ -#define CORE_H_ - -#include "XML_Parse.h" -#include "array.h" -#include "basic_components.h" -#include "interconnect.h" -#include "logic.h" -#include "parameter.h" -#include "sharedcache.h" - -class BranchPredictor : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST *globalBPT; - ArrayST *localBPT; - ArrayST *L1_localBPT; - ArrayST *L2_localBPT; - ArrayST *chooser; - ArrayST *RAS; - bool exist; - - BranchPredictor(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exsit = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~BranchPredictor(); -}; - -class InstFetchU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Cache_policy cache_p; - InstCache icache; - ArrayST *IB; - ArrayST *BTB; - BranchPredictor *BPT; - inst_decoder *ID_inst; - inst_decoder *ID_operand; - inst_decoder *ID_misc; - bool exist; - - InstFetchU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exsit = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~InstFetchU(); -}; - -class SchedulerU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double Iw_height, fp_Iw_height, ROB_height; - ArrayST *int_inst_window; - ArrayST *fp_inst_window; - ArrayST *ROB; - selection_logic *instruction_selection; - bool exist; - - SchedulerU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~SchedulerU(); -}; - -class RENAMINGU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate, executionTime; - CoreDynParam coredynp; - ArrayST *iFRAT; - ArrayST *fFRAT; - ArrayST *iRRAT; - ArrayST *fRRAT; - ArrayST *ifreeL; - ArrayST *ffreeL; - dep_resource_conflict_check *idcl; - dep_resource_conflict_check *fdcl; - ArrayST *RAHT; // register alias history table Used to store GC - bool exist; - - RENAMINGU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~RENAMINGU(); -}; - -class LoadStoreU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - enum Cache_policy cache_p; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - DataCache dcache; - ArrayST *LSQ; // it is actually the store queue but for inorder processors it - // serves as both loadQ and StoreQ - ArrayST *LoadQ; - bool exist; - - LoadStoreU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~LoadStoreU(); -}; - -class MemManU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST *itlb; - ArrayST *dtlb; - bool exist; - - MemManU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~MemManU(); -}; - -class RegFU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double int_regfile_height, fp_regfile_height; - ArrayST *IRF; - ArrayST *FRF; - ArrayST *RFWIN; - bool exist; - - RegFU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~RegFU(); -}; - -class EXECU : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - CoreDynParam coredynp; - RegFU *rfu; - SchedulerU *scheu; - FunctionalUnit *fp_u; - FunctionalUnit *exeu; - FunctionalUnit *mul; - interconnect *int_bypass; - interconnect *intTagBypass; - interconnect *int_mul_bypass; - interconnect *intTag_mul_Bypass; - interconnect *fp_bypass; - interconnect *fpTagBypass; - - Component bypass; - bool exist; - - EXECU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - double lsq_height_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~EXECU(); -}; - -class Core : public Component { -public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - InstFetchU *ifu; - LoadStoreU *lsu; - MemManU *mmu; - EXECU *exu; - RENAMINGU *rnu; - Pipeline *corepipe; - UndiffCore *undiffCore; - SharedCache *l2cache; - CoreDynParam coredynp; - // full_decoder inst_decoder; - // clock_network clockNetwork; - Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_); - void set_core_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~Core(); -}; - -#endif /* CORE_H_ */ diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 0000000..efe593e --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,22 @@ +add_library(core + branch_predictor.h + branch_predictor.cc + core.h + core.cc + exec_unit.h + exec_unit.cc + instfetch.h + instfetch.cc + loadstore.h + loadstore.cc + mmu.h + mmu.cc + regfile.h + regfile.cc + renaming_unit.h + renaming_unit.cc + scheduler.h + scheduler.cc +) +target_include_directories(core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(core LINK_PUBLIC cacti cache top) diff --git a/src/core/branch_predictor.cc b/src/core/branch_predictor.cc new file mode 100644 index 0000000..c17fd42 --- /dev/null +++ b/src/core/branch_predictor.cc @@ -0,0 +1,520 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "branch_predictor.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +BranchPredictor::BranchPredictor(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), globalBPT(0), localBPT(0), L1_localBPT(0), + L2_localBPT(0), chooser(0), RAS(0), exist(exist_) { + /* + * Branch Predictor, accessed during ID stage. + * McPAT's branch predictor model is the tournament branch predictor used in + * Alpha 21264, including global predictor, local two level predictor, and + * Chooser. The Branch predictor also includes a RAS (return address stack) + * for function calls Branch predictors are tagged by thread ID and modeled as + * 1-way associative cache. However RAS return address stacks are duplicated + * for each thread. + * TODO:Data Width need to be computed more precisely * + */ + if (!exist) + return; + int tag, data; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + interface_ip.assoc = 1; + interface_ip.pure_cam = false; + if (coredynp.multithreaded) { + + tag = int(log2(coredynp.num_hthreads) + EXTRA_TAG_BITS); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + } else { + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + } + // Global predictor + data = + int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.global_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + globalBPT = new ArrayST(&interface_ip, + "Global Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + globalBPT->area.set_area(globalBPT->area.get_area() + + globalBPT->local_result.area); + area.set_area(area.get_area() + globalBPT->local_result.area); + + // Local BPT (Level 1) + data = + int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0] / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.local_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + L1_localBPT = new ArrayST(&interface_ip, + "L1 local Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + L1_localBPT->area.set_area(L1_localBPT->area.get_area() + + L1_localBPT->local_result.area); + area.set_area(area.get_area() + L1_localBPT->local_result.area); + + // Local BPT (Level 2) + data = + int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1] / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.local_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + L2_localBPT = new ArrayST(&interface_ip, + "L2 local Predictor", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + L2_localBPT->area.set_area(L2_localBPT->area.get_area() + + L2_localBPT->local_result.area); + area.set_area(area.get_area() + L2_localBPT->local_result.area); + + // Chooser + data = + int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].predictor.chooser_predictor_entries; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + chooser = new ArrayST(&interface_ip, + "Predictor Chooser", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + chooser->area.set_area(chooser->area.get_area() + chooser->local_result.area); + area.set_area(area.get_area() + chooser->local_result.area); + + // RAS return address stacks are Duplicated for each thread. + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + data = int(ceil(coredynp.pc_width / 8.0)); + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].RAS_size; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + RAS = new ArrayST( + &interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); + RAS->area.set_area(RAS->area.get_area() + + RAS->local_result.area * coredynp.num_hthreads); + area.set_area(area.get_area() + + RAS->local_result.area * coredynp.num_hthreads); +} + +void BranchPredictor::computeEnergy(bool is_tdp) { + if (!exist) + return; + double r_access; + double w_access; + if (is_tdp) { + r_access = coredynp.predictionW * coredynp.BR_duty_cycle; + w_access = 0 * coredynp.BR_duty_cycle; + globalBPT->stats_t.readAc.access = r_access; + globalBPT->stats_t.writeAc.access = w_access; + globalBPT->tdp_stats = globalBPT->stats_t; + + L1_localBPT->stats_t.readAc.access = r_access; + L1_localBPT->stats_t.writeAc.access = w_access; + L1_localBPT->tdp_stats = L1_localBPT->stats_t; + + L2_localBPT->stats_t.readAc.access = r_access; + L2_localBPT->stats_t.writeAc.access = w_access; + L2_localBPT->tdp_stats = L2_localBPT->stats_t; + + chooser->stats_t.readAc.access = r_access; + chooser->stats_t.writeAc.access = w_access; + chooser->tdp_stats = chooser->stats_t; + + RAS->stats_t.readAc.access = r_access; + RAS->stats_t.writeAc.access = w_access; + RAS->tdp_stats = RAS->stats_t; + } else { + // The resolution of BPT accesses is coarse, but this is + // because most simulators cannot track finer grained details + r_access = XML->sys.core[ithCore].branch_instructions; + w_access = + XML->sys.core[ithCore].branch_mispredictions + + 0.1 * XML->sys.core[ithCore] + .branch_instructions; // 10% of BR will flip internal bits//0 + globalBPT->stats_t.readAc.access = r_access; + globalBPT->stats_t.writeAc.access = w_access; + globalBPT->rtp_stats = globalBPT->stats_t; + + L1_localBPT->stats_t.readAc.access = r_access; + L1_localBPT->stats_t.writeAc.access = w_access; + L1_localBPT->rtp_stats = L1_localBPT->stats_t; + + L2_localBPT->stats_t.readAc.access = r_access; + L2_localBPT->stats_t.writeAc.access = w_access; + L2_localBPT->rtp_stats = L2_localBPT->stats_t; + + chooser->stats_t.readAc.access = r_access; + chooser->stats_t.writeAc.access = w_access; + chooser->rtp_stats = chooser->stats_t; + + RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; + RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; + RAS->rtp_stats = RAS->stats_t; + } + + globalBPT->power_t.reset(); + L1_localBPT->power_t.reset(); + L2_localBPT->power_t.reset(); + chooser->power_t.reset(); + RAS->power_t.reset(); + + globalBPT->power_t.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->stats_t.readAc.access + + globalBPT->stats_t.writeAc.access * + globalBPT->local_result.power.writeOp.dynamic; + L1_localBPT->power_t.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->stats_t.readAc.access + + L1_localBPT->stats_t.writeAc.access * + L1_localBPT->local_result.power.writeOp.dynamic; + + L2_localBPT->power_t.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->stats_t.readAc.access + + L2_localBPT->stats_t.writeAc.access * + L2_localBPT->local_result.power.writeOp.dynamic; + + chooser->power_t.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->stats_t.readAc.access + + chooser->stats_t.writeAc.access * + chooser->local_result.power.writeOp.dynamic; + RAS->power_t.readOp.dynamic += + RAS->local_result.power.readOp.dynamic * RAS->stats_t.readAc.access + + RAS->stats_t.writeAc.access * RAS->local_result.power.writeOp.dynamic; + + if (is_tdp) { + globalBPT->power = + globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; + L1_localBPT->power = + L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; + L2_localBPT->power = + L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; + chooser->power = chooser->power_t + chooser->local_result.power * pppm_lkg; + RAS->power = + RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; + + power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + + chooser->power + RAS->power; + } else { + globalBPT->rt_power = + globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; + L1_localBPT->rt_power = + L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; + L2_localBPT->rt_power = + L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; + chooser->rt_power = + chooser->power_t + chooser->local_result.power * pppm_lkg; + RAS->rt_power = + RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; + rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + + L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + } +} + +void BranchPredictor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + if (is_tdp) { + cout << indent_str << "Global Predictor:" << endl; + cout << indent_str_next << "Area = " << globalBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << globalBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? globalBPT->power.readOp.longer_channel_leakage + : globalBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? globalBPT->power.readOp + .power_gated_with_long_channel_leakage + : globalBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << globalBPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Local Predictor:" << endl; + cout << indent_str << "L1_Local Predictor:" << endl; + cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? L1_localBPT->power.readOp.longer_channel_leakage + : L1_localBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? L1_localBPT->power.readOp + .power_gated_with_long_channel_leakage + : L1_localBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << L1_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "L2_Local Predictor:" << endl; + cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? L2_localBPT->power.readOp.longer_channel_leakage + : L2_localBPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? L2_localBPT->power.readOp + .power_gated_with_long_channel_leakage + : L2_localBPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << L2_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + + cout << indent_str << "Chooser:" << endl; + cout << indent_str_next << "Area = " << chooser->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << chooser->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? chooser->power.readOp.longer_channel_leakage + : chooser->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? chooser->power.readOp.power_gated_with_long_channel_leakage + : chooser->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << chooser->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "RAS:" << endl; + cout << indent_str_next << "Area = " << RAS->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << RAS->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? RAS->power.readOp.longer_channel_leakage + : RAS->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? RAS->power.readOp.power_gated_with_long_channel_leakage + : RAS->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + } else { + // cout << indent_str_next << "Global Predictor Peak Dynamic = " + //<< globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "Global Predictor Subthreshold Leakage = " + // << globalBPT->rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next + //<< "Global Predictor Gate Leakage = " << + // globalBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // << indent_str_next << "Local Predictor Peak Dynamic = " << + // L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Local Predictor Subthreshold Leakage = " << + // L1_localBPT->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next << "Local Predictor Gate Leakage = " << + // L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // << indent_str_next << "Chooser Peak Dynamic = " << + // chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Chooser Subthreshold Leakage = " << + // chooser->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + //<< "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << + //" W" << endl; cout << indent_str_next << "RAS Peak Dynamic = " + //<< RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "RAS Subthreshold Leakage = " << + // RAS->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" + //<< endl; + } +} + +BranchPredictor ::~BranchPredictor() { + + if (!exist) + return; + if (globalBPT) { + delete globalBPT; + globalBPT = 0; + } + if (localBPT) { + delete localBPT; + localBPT = 0; + } + if (L1_localBPT) { + delete L1_localBPT; + L1_localBPT = 0; + } + if (L2_localBPT) { + delete L2_localBPT; + L2_localBPT = 0; + } + if (chooser) { + delete chooser; + chooser = 0; + } + if (RAS) { + delete RAS; + RAS = 0; + } +} diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h new file mode 100644 index 0000000..8fb5d57 --- /dev/null +++ b/src/core/branch_predictor.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __BRANCH_PREDICTOR_H__ +#define __BRANCH_PREDICTOR_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class BranchPredictor : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + ArrayST *globalBPT; + ArrayST *localBPT; + ArrayST *L1_localBPT; + ArrayST *L2_localBPT; + ArrayST *chooser; + ArrayST *RAS; + bool exist; + + BranchPredictor(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exsit = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~BranchPredictor(); +}; + +#endif // __BRANCH_PREDICTOR__ diff --git a/src/core/core.cc b/src/core/core.cc new file mode 100644 index 0000000..3f48db2 --- /dev/null +++ b/src/core/core.cc @@ -0,0 +1,768 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "core.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include +//#include "globalvar.h" + +Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + ifu(0), lsu(0), mmu(0), exu(0), rnu(0), corepipe(0), undiffCore(0), + l2cache(0) { + /* + * initialize, compute and optimize individual components. + */ + + bool exit_flag = true; + + double pipeline_area_per_unit; + // interface_ip.wire_is_mat_type = 2; + // interface_ip.wire_os_mat_type = 2; + // interface_ip.wt =Global_30; + set_core_param(); + + if (XML->sys.Private_L2) { + l2cache = new SharedCache(XML, ithCore, &interface_ip); + } + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); + lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); + mmu = new MemManU(XML, ithCore, &interface_ip, coredynp, exit_flag); + exu = new EXECU( + XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); + undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); + if (coredynp.core_ty == OOO) { + rnu = new RENAMINGU(XML, ithCore, &interface_ip, coredynp); + } + corepipe = new Pipeline(&interface_ip, coredynp); + + if (coredynp.core_ty == OOO) { + pipeline_area_per_unit = + (corepipe->area.get_area() * coredynp.num_pipelines) / 5.0; + if (rnu->exist) { + rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + } + } else { + pipeline_area_per_unit = + (corepipe->area.get_area() * coredynp.num_pipelines) / 4.0; + } + + // area.set_area(area.get_area()+ corepipe->area.get_area()); + if (ifu->exist) { + ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu->area.get_area()); + } + if (lsu->exist) { + lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu->area.get_area()); + } + if (exu->exist) { + exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu->area.get_area()); + } + if (mmu->exist) { + mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu->area.get_area()); + } + + if (coredynp.core_ty == OOO) { + if (rnu->exist) { + + area.set_area(area.get_area() + rnu->area.get_area()); + } + } + + if (undiffCore->exist) { + area.set_area(area.get_area() + undiffCore->area.get_area()); + } + + if (XML->sys.Private_L2) { + area.set_area(area.get_area() + l2cache->area.get_area()); + } + // //clock power + // clockNetwork.init_wire_external(is_default, &interface_ip); + // clockNetwork.clk_area =area*1.1;//10% of placement overhead. + // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal + // clockNetwork.start_wiring_level =5;//toplevel metal + // clockNetwork.num_regs = corepipe.tot_stage_vector; + // clockNetwork.optimize_wire(); +} + +void Core::computeEnergy(bool is_tdp) { + /* + * When computing TDP, power = energy_per_cycle (the value computed in this + * function) * clock_rate (in the display_energy function) When computing + * dyn_power; power = total energy (the value computed in this function) / + * Total execution time (cycle count / clock rate) + */ + // power_point_product_masks + double pppm_t[4] = {1, 1, 1, 1}; + double rtp_pipeline_coe; + double num_units = 4.0; + if (is_tdp) { + ifu->computeEnergy(is_tdp); + lsu->computeEnergy(is_tdp); + mmu->computeEnergy(is_tdp); + exu->computeEnergy(is_tdp); + + if (coredynp.core_ty == OOO) { + num_units = 5.0; + rnu->computeEnergy(is_tdp); + set_pppm( + pppm_t, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / + num_units); // User need to feed a duty cycle to improve accuracy + if (rnu->exist) { + rnu->power = rnu->power + corepipe->power * pppm_t; + power = power + rnu->power; + } + } + + if (ifu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.IFU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + // cout << "IFU = " << + // ifu->power.readOp.dynamic*clockRate << " W" << endl; + ifu->power = ifu->power + corepipe->power * pppm_t; + // cout << "IFU = " << + // ifu->power.readOp.dynamic*clockRate << " W" << endl; + // cout << "1/4 pipe = " << + // corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; + power = power + ifu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (lsu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.LSU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + lsu->power = lsu->power + corepipe->power * pppm_t; + // cout << "LSU = " << + // lsu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + lsu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (exu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * coredynp.ALU_duty_cycle, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + exu->power = exu->power + corepipe->power * pppm_t; + // cout << "EXE = " << + // exu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + exu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + if (mmu->exist) { + set_pppm(pppm_t, + coredynp.num_pipelines / num_units * + (0.5 + 0.5 * coredynp.LSU_duty_cycle), + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + mmu->power = mmu->power + corepipe->power * pppm_t; + // cout << "MMU = " << + // mmu->power.readOp.dynamic*clockRate << " W" << endl; + power = power + mmu->power; + // cout << "core = " << + // power.readOp.dynamic*clockRate << " W" << endl; + } + + power = power + undiffCore->power; + + if (XML->sys.Private_L2) { + + l2cache->computeEnergy(is_tdp); + set_pppm(pppm_t, l2cache->cachep.clockRate / clockRate, 1, 1, 1); + // l2cache->power = l2cache->power*pppm_t; + power = power + l2cache->power * pppm_t; + } + + } else { + ifu->computeEnergy(is_tdp); + lsu->computeEnergy(is_tdp); + mmu->computeEnergy(is_tdp); + exu->computeEnergy(is_tdp); + + if (coredynp.core_ty == OOO) { + num_units = 5.0; + rnu->computeEnergy(is_tdp); + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + XML->sys.total_cycles * XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + if (rnu->exist) { + rnu->rt_power = rnu->rt_power + corepipe->power * pppm_t; + + rt_power = rt_power + rnu->rt_power; + } + } else { + num_units = 4.0; + } + + if (ifu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.IFU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.IFU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + ifu->rt_power = ifu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + ifu->rt_power; + } + if (lsu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.LSU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.LSU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + + lsu->rt_power = lsu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + lsu->rt_power; + } + if (exu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.ALU_duty_cycle * XML->sys.total_cycles * + XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + coredynp.ALU_duty_cycle * coredynp.total_cycles; + } + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + exu->rt_power = exu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + exu->rt_power; + } + if (mmu->exist) { + if (XML->sys.homogeneous_cores == 1) { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + (0.5 + 0.5 * coredynp.LSU_duty_cycle) * + XML->sys.total_cycles * XML->sys.number_of_cores; + } else { + rtp_pipeline_coe = coredynp.pipeline_duty_cycle * + (0.5 + 0.5 * coredynp.LSU_duty_cycle) * + coredynp.total_cycles; + } + set_pppm(pppm_t, + coredynp.num_pipelines * rtp_pipeline_coe / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units, + coredynp.num_pipelines / num_units); + mmu->rt_power = mmu->rt_power + corepipe->power * pppm_t; + rt_power = rt_power + mmu->rt_power; + } + + rt_power = rt_power + undiffCore->power; + // cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" + //<< endl; + if (XML->sys.Private_L2) { + + l2cache->computeEnergy(is_tdp); + // set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); + // l2cache->rt_power = l2cache->rt_power*pppm_t; + rt_power = rt_power + l2cache->rt_power; + } + } +} + +void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << "Core:" << endl; + cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << endl; + cout << indent_str + << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (ifu->exist) { + cout << indent_str << "Instruction Fetch Unit:" << endl; + cout << indent_str_next << "Area = " << ifu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ifu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ifu->power.readOp.longer_channel_leakage + : ifu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ifu->power.readOp.power_gated_with_long_channel_leakage + : ifu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ifu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + ifu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (coredynp.core_ty == OOO) { + if (rnu->exist) { + cout << indent_str << "Renaming Unit:" << endl; + cout << indent_str_next << "Area = " << rnu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << rnu->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? rnu->power.readOp.longer_channel_leakage + : rnu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? rnu->power.readOp.power_gated_with_long_channel_leakage + : rnu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << rnu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + rnu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + } + if (lsu->exist) { + cout << indent_str << "Load Store Unit:" << endl; + cout << indent_str_next << "Area = " << lsu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << lsu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? lsu->power.readOp.longer_channel_leakage + : lsu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? lsu->power.readOp.power_gated_with_long_channel_leakage + : lsu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << lsu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + lsu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (mmu->exist) { + cout << indent_str << "Memory Management Unit:" << endl; + cout << indent_str_next << "Area = " << mmu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << mmu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? mmu->power.readOp.longer_channel_leakage + : mmu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? mmu->power.readOp.power_gated_with_long_channel_leakage + : mmu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << mmu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + mmu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + if (exu->exist) { + cout << indent_str << "Execution Unit:" << endl; + cout << indent_str_next << "Area = " << exu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << exu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? exu->power.readOp.longer_channel_leakage + : exu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? exu->power.readOp.power_gated_with_long_channel_leakage + : exu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << exu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 2) { + exu->displayEnergy(indent + 4, plevel, is_tdp); + } + } + // if (plevel >2) + // { + // if (undiffCore->exist) + // { + // cout << indent_str << "Undifferentiated Core" << + // endl; cout << indent_str_next << "Area = " << + // undiffCore->area.get_area()*1e-6<< " mm^2" << endl; cout + // << indent_str_next << "Peak Dynamic = " << + // undiffCore->power.readOp.dynamic*clockRate << " W" << endl; + //// cout << indent_str_next << "Subthreshold Leakage = " + ///<< undiffCore->power.readOp.leakage <<" W" << endl; + // cout << indent_str_next << "Subthreshold Leakage + //= + //" + // << + //(long_channel? + // undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) + //<< " W" << endl; cout << indent_str_next << "Gate Leakage = " + //<< undiffCore->power.readOp.gate_leakage << " W" << endl; + // // cout << indent_str_next << "Runtime Dynamic = " + //<< undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; + // cout + //<sys.Private_L2) { + + l2cache->displayEnergy(4, is_tdp); + } + + } else { + // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = + //" + //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " + // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << + // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next + //<< "Load Store Unit Peak Dynamic = " << + // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << + // lsu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Load Store Unit Gate Leakage = " << + // lsu->rt_power.readOp.gate_leakage + //<< " W" << endl; cout << indent_str_next << "Memory Management Unit + // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << + // endl; cout << indent_str_next << "Memory Management Unit Subthreshold + // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout + // << indent_str_next << "Memory Management Unit Gate Leakage = " << + // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Execution Unit Peak Dynamic = " << + // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Execution Unit Subthreshold Leakage = " << + // exu->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Execution Unit Gate Leakage = " << + // exu->rt_power.readOp.gate_leakage + //<< " W" << endl; + } +} + +Core ::~Core() { + + if (ifu) { + delete ifu; + ifu = 0; + } + if (lsu) { + delete lsu; + lsu = 0; + } + if (rnu) { + delete rnu; + rnu = 0; + } + if (mmu) { + delete mmu; + mmu = 0; + } + if (exu) { + delete exu; + exu = 0; + } + if (corepipe) { + delete corepipe; + corepipe = 0; + } + if (undiffCore) { + delete undiffCore; + undiffCore = 0; + } + if (l2cache) { + delete l2cache; + l2cache = 0; + } +} + +void Core::set_core_param() { + coredynp.opt_local = XML->sys.core[ithCore].opt_local; + coredynp.x86 = XML->sys.core[ithCore].x86; + coredynp.Embedded = XML->sys.Embedded; + coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; + coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; + coredynp.fetchW = XML->sys.core[ithCore].fetch_width; + coredynp.decodeW = XML->sys.core[ithCore].decode_width; + coredynp.issueW = XML->sys.core[ithCore].issue_width; + coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; + coredynp.commitW = XML->sys.core[ithCore].commit_width; + coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; + coredynp.predictionW = XML->sys.core[ithCore].prediction_width; + coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; + coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; + coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; + coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; + coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; + coredynp.vdd = XML->sys.core[ithCore].vdd; + coredynp.power_gating_vcc = XML->sys.core[ithCore].power_gating_vcc; + + coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; + coredynp.multithreaded = coredynp.num_hthreads > 1 ? true : false; + coredynp.hthread_width = + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))); + coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; + coredynp.pc_width = XML->sys.virtual_address_width; + + coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; + coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; + coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; + coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; + coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; + coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; + coredynp.int_data_width = int(ceil(XML->sys.machine_bits / 32.0)) * 32; + coredynp.fp_data_width = coredynp.int_data_width; + coredynp.v_address_width = XML->sys.virtual_address_width; + coredynp.p_address_width = XML->sys.physical_address_width; + + coredynp.scheu_ty = + (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; + coredynp.arch_ireg_width = + int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); + coredynp.arch_freg_width = + int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); + coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; + coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; + coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; + coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; + coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; + coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; + + // Max power duty cycle for peak power estimation + // if (coredynp.core_ty==OOO) + // { + // coredynp.IFU_duty_cycle = 1; + // coredynp.LSU_duty_cycle = 1; + // coredynp.MemManU_I_duty_cycle =1; + // coredynp.MemManU_D_duty_cycle =1; + // coredynp.ALU_duty_cycle =1; + // coredynp.MUL_duty_cycle =1; + // coredynp.FPU_duty_cycle =1; + // coredynp.ALU_cdb_duty_cycle =1; + // coredynp.MUL_cdb_duty_cycle =1; + // coredynp.FPU_cdb_duty_cycle =1; + // } + // else + // { + coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; + coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; + coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; + coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; + coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; + coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; + coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; + coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; + coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; + coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; + coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; + // } + + if (!((coredynp.core_ty == OOO) || (coredynp.core_ty == Inorder))) { + cout << "Invalid Core Type" << endl; + exit(0); + } + // if (coredynp.core_ty==OOO) + // { + // cout<<"OOO processor models are being updated and will be + // available in next release"<sys.core[ithCore].phy_Regs_IRF_size))); + coredynp.phy_freg_width = + int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); + coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = + XML->sys.core[ithCore].phy_Regs_IRF_size; + coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = + XML->sys.core[ithCore].phy_Regs_FRF_size; + } else if (coredynp.scheu_ty == + ReservationStation) { // ROB serves as Phy RF in RS based OOO + coredynp.phy_ireg_width = + int(ceil(log2(XML->sys.core[ithCore].ROB_size))); + coredynp.phy_freg_width = + int(ceil(log2(XML->sys.core[ithCore].ROB_size))); + coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; + coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; + } + } + + int GC_count = + XML->sys.core[ithCore] + .checkpoint_depth; // best check pointing entries for a 4~8 issue OOO + // should be 8~48;See TR for reference. + if (coredynp.rm_ty == RAMbased) { + coredynp.globalCheckpoint = + GC_count > 4 ? 4 : GC_count; // RAM-based RAT cannot have more than 4 + // GCs; see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + } else if (coredynp.rm_ty == CAMbased) { + coredynp.globalCheckpoint = GC_count < 1 ? 1 : GC_count; + } + + coredynp.perThreadState = 8; + coredynp.instruction_length = 32; + coredynp.clockRate = XML->sys.core[ithCore].clock_rate; + coredynp.clockRate *= 1e6; + coredynp.regWindowing = (XML->sys.core[ithCore].register_windows_size > 0 && + coredynp.core_ty == Inorder) + ? true + : false; + coredynp.executionTime = XML->sys.total_cycles / coredynp.clockRate; + set_pppm(coredynp.pppm_lkg_multhread, + 0, + coredynp.num_hthreads, + coredynp.num_hthreads, + 0); + + // does not care device types, since all core device types are set at sys. + // level + if (coredynp.vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = coredynp.vdd; + interface_ip.lop_Vdd = coredynp.vdd; + interface_ip.lstp_Vdd = coredynp.vdd; + } + + if (coredynp.power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = coredynp.power_gating_vcc; + } +} diff --git a/src/core/core.h b/src/core/core.h new file mode 100644 index 0000000..b31c203 --- /dev/null +++ b/src/core/core.h @@ -0,0 +1,74 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef CORE_H_ +#define CORE_H_ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "branch_predictor.h" +#include "exec_unit.h" +#include "instfetch.h" +#include "interconnect.h" +#include "loadstore.h" +#include "logic.h" +#include "mmu.h" +#include "parameter.h" +#include "renaming_unit.h" +#include "sharedcache.h" + +class Core : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate, executionTime; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + InstFetchU *ifu; + LoadStoreU *lsu; + MemManU *mmu; + EXECU *exu; + RENAMINGU *rnu; + Pipeline *corepipe; + UndiffCore *undiffCore; + SharedCache *l2cache; + CoreDynParam coredynp; + // full_decoder inst_decoder; + // clock_network clockNetwork; + Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_); + void set_core_param(); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~Core(); +}; + +#endif /* CORE_H_ */ diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc new file mode 100644 index 0000000..3d9a219 --- /dev/null +++ b/src/core/exec_unit.cc @@ -0,0 +1,670 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "exec_unit.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +EXECU::EXECU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), fp_u(0), + exeu(0), mul(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), + intTag_mul_Bypass(0), fp_bypass(0), fpTagBypass(0), exist(exist_) { + bool exist_flag = true; + if (!exist) + return; + double fu_height = 0.0; + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + rfu = new RegFU(XML, ithCore, &interface_ip, coredynp); + scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); + exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); + area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + + scheu->area.get_area()); + fu_height = exeu->FU_height; + if (coredynp.num_fpus > 0) { + fp_u = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, FPU); + area.set_area(area.get_area() + fp_u->area.get_area()); + } + if (coredynp.num_muls > 0) { + mul = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, MUL); + area.set_area(area.get_area() + mul->area.get_area()); + fu_height += mul->FU_height; + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; + * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the + * same bus there are multiple tri-state drivers and muxes that go to + * different components on the same bus + */ + if (XML->sys.Embedded) { + interface_ip.wt = Global_30; + interface_ip.wire_is_mat_type = 0; + interface_ip.wire_os_mat_type = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + } else { + interface_ip.wt = Global; + interface_ip.wire_is_mat_type = + 2; // start from semi-global since local wires are already used + interface_ip.wire_os_mat_type = 2; + interface_ip.throughput = 10.0 / clockRate; // Do not care + interface_ip.latency = 10.0 / clockRate; + } + + if (coredynp.core_ty == Inorder) { + int_bypass = + new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32), + rfu->int_regfile_height + exeu->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->int_regfile_height + exeu->FU_height + + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + + if (coredynp.num_muls > 0) { + int_mul_bypass = + new interconnect("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + intTag_mul_Bypass = + new interconnect("Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + + if (coredynp.num_fpus > 0) { + fp_bypass = + new interconnect("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + fp_u->FU_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); + fpTagBypass = new interconnect("FP Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + fp_u->FU_height + + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } else { // OOO + if (coredynp.scheu_ty == PhysicalRegFile) { + /* For physical register based OOO, + * data broadcast interconnects cover across functional units, lsq, inst + * windows and register files, while tag broadcast interconnects also + * cover across ROB + */ + int_bypass = new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_bypass->area.get_area()); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + + if (coredynp.num_muls > 0) { + int_mul_bypass = + new interconnect("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTag_mul_Bypass = new interconnect( + "Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + + if (coredynp.num_fpus > 0) { + fp_bypass = new interconnect("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u->FU_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + fpTagBypass = new interconnect( + "FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fp_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } else { + /* + * In RS based processor both data and tag are broadcast together, + * covering functional units, lsq, nst windows, register files, and ROBs + */ + int_bypass = new interconnect("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + + lsq_height + scheu->Iw_height + + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTagBypass = new interconnect("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + + exeu->FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTagBypass->area.get_area()); + if (coredynp.num_muls > 0) { + int_mul_bypass = new interconnect( + "Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTag_mul_Bypass = new interconnect( + "Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + int_mul_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + + if (coredynp.num_fpus > 0) { + fp_bypass = new interconnect("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u->FU_height + + lsq_height + scheu->fp_Iw_height + + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + fpTagBypass = new interconnect( + "FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + + fp_bypass->area.get_area()); + bypass.area.set_area(bypass.area.get_area() + + fpTagBypass->area.get_area()); + } + } + } + area.set_area(area.get_area() + bypass.area.get_area()); +} + +void EXECU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double pppm_t[4] = {1, 1, 1, 1}; + // rfu->power.reset(); + // rfu->rt_power.reset(); + // scheu->power.reset(); + // scheu->rt_power.reset(); + // exeu->power.reset(); + // exeu->rt_power.reset(); + + rfu->computeEnergy(is_tdp); + scheu->computeEnergy(is_tdp); + exeu->computeEnergy(is_tdp); + if (coredynp.num_fpus > 0) { + fp_u->computeEnergy(is_tdp); + } + if (coredynp.num_muls > 0) { + mul->computeEnergy(is_tdp); + } + + if (is_tdp) { + set_pppm( + pppm_t, + 2 * coredynp.ALU_cdb_duty_cycle, + 2, + 2, + 2 * coredynp + .ALU_cdb_duty_cycle); // 2 means two source operands needs to be + // passed for each int instruction. + bypass.power = bypass.power + intTagBypass->power * pppm_t + + int_bypass->power * pppm_t; + if (coredynp.num_muls > 0) { + set_pppm( + pppm_t, + 2 * coredynp.MUL_cdb_duty_cycle, + 2, + 2, + 2 * coredynp + .MUL_cdb_duty_cycle); // 2 means two source operands needs to + // be passed for each int instruction. + bypass.power = bypass.power + intTag_mul_Bypass->power * pppm_t + + int_mul_bypass->power * pppm_t; + power = power + mul->power; + } + if (coredynp.num_fpus > 0) { + set_pppm( + pppm_t, + 3 * coredynp.FPU_cdb_duty_cycle, + 3, + 3, + 3 * coredynp + .FPU_cdb_duty_cycle); // 3 means three source operands needs + // to be passed for each fp instruction. + bypass.power = bypass.power + fp_bypass->power * pppm_t + + fpTagBypass->power * pppm_t; + power = power + fp_u->power; + } + + power = power + rfu->power + exeu->power + bypass.power + scheu->power; + } else { + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_alu_accesses, + 2, + 2, + XML->sys.core[ithCore].cdb_alu_accesses); + bypass.rt_power = bypass.rt_power + intTagBypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + int_bypass->power * pppm_t; + + if (coredynp.num_muls > 0) { + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_mul_accesses, + 2, + 2, + XML->sys.core[ithCore] + .cdb_mul_accesses); // 2 means two source operands needs to + // be passed for each int instruction. + bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power * pppm_t + + int_mul_bypass->power * pppm_t; + rt_power = rt_power + mul->rt_power; + } + + if (coredynp.num_fpus > 0) { + set_pppm(pppm_t, + XML->sys.core[ithCore].cdb_fpu_accesses, + 3, + 3, + XML->sys.core[ithCore].cdb_fpu_accesses); + bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; + rt_power = rt_power + fp_u->rt_power; + } + rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + + scheu->rt_power; + } +} + +void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + // cout << indent_str_next << "Results Broadcast Bus Area = " << + // bypass->area.get_area() *1e-6 << " mm^2" << endl; + if (is_tdp) { + cout << indent_str << "Register Files:" << endl; + cout << indent_str_next << "Area = " << rfu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << rfu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? rfu->power.readOp.longer_channel_leakage + : rfu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? rfu->power.readOp.power_gated_with_long_channel_leakage + : rfu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (plevel > 3) { + rfu->displayEnergy(indent + 4, is_tdp); + } + cout << indent_str << "Instruction Scheduler:" << endl; + cout << indent_str_next << "Area = " << scheu->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << scheu->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? scheu->power.readOp.longer_channel_leakage + : scheu->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? scheu->power.readOp.power_gated_with_long_channel_leakage + : scheu->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << scheu->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 3) { + scheu->displayEnergy(indent + 4, is_tdp); + } + exeu->displayEnergy(indent, is_tdp); + if (coredynp.num_fpus > 0) { + fp_u->displayEnergy(indent, is_tdp); + } + if (coredynp.num_muls > 0) { + mul->displayEnergy(indent, is_tdp); + } + cout << indent_str << "Results Broadcast Bus:" << endl; + cout << indent_str_next + << "Area Overhead = " << bypass.area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next + << "Peak Dynamic = " << bypass.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? bypass.power.readOp.longer_channel_leakage + : bypass.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? bypass.power.readOp.power_gated_with_long_channel_leakage + : bypass.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << bypass.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else { + cout << indent_str_next << "Register Files Peak Dynamic = " + << rfu->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Register Files Subthreshold Leakage = " + << rfu->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Register Files Gate Leakage = " + << rfu->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " + << scheu->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " + << scheu->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Sheduler Gate Leakage = " + << scheu->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " + << bypass.rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " + << bypass.rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " + << bypass.rt_power.readOp.gate_leakage << " W" << endl; + } +} + +EXECU ::~EXECU() { + + if (!exist) + return; + if (int_bypass) { + delete int_bypass; + int_bypass = 0; + } + if (intTagBypass) { + delete intTagBypass; + intTagBypass = 0; + } + if (int_mul_bypass) { + delete int_mul_bypass; + int_mul_bypass = 0; + } + if (intTag_mul_Bypass) { + delete intTag_mul_Bypass; + intTag_mul_Bypass = 0; + } + if (fp_bypass) { + delete fp_bypass; + fp_bypass = 0; + } + if (fpTagBypass) { + delete fpTagBypass; + fpTagBypass = 0; + } + if (fp_u) { + delete fp_u; + fp_u = 0; + } + if (exeu) { + delete exeu; + exeu = 0; + } + if (mul) { + delete mul; + mul = 0; + } + if (rfu) { + delete rfu; + rfu = 0; + } + if (scheu) { + delete scheu; + scheu = 0; + } +} diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h new file mode 100644 index 0000000..0199d7d --- /dev/null +++ b/src/core/exec_unit.h @@ -0,0 +1,82 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __EXEC_U_H__ +#define __EXEC_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" +#include "regfile.h" +#include "scheduler.h" + +class EXECU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + double lsq_height; + CoreDynParam coredynp; + RegFU *rfu; + SchedulerU *scheu; + FunctionalUnit *fp_u; + FunctionalUnit *exeu; + FunctionalUnit *mul; + interconnect *int_bypass; + interconnect *intTagBypass; + interconnect *int_mul_bypass; + interconnect *intTag_mul_Bypass; + interconnect *fp_bypass; + interconnect *fpTagBypass; + + Component bypass; + bool exist; + + EXECU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~EXECU(); +}; + +#endif // __EXEC_U_H__ diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc new file mode 100644 index 0000000..c02c65d --- /dev/null +++ b/src/core/instfetch.cc @@ -0,0 +1,820 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "instfetch.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +InstFetchU::InstFetchU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), IB(0), BTB(0), ID_inst(0), ID_operand(0), ID_misc(0), + exist(exist_) { + if (!exist) + return; + int idx, tag, data, size, line, assoc, banks; + bool debug = false, is_default = true; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; + // Assuming all L1 caches are virtually idxed physically tagged. + // cache + + size = (int)XML->sys.core[ithCore].icache.icache_config[0]; + line = (int)XML->sys.core[ithCore].icache.icache_config[1]; + assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; + banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + tag = debug ? 51 + : (int)XML->sys.physical_address_width - idx - + int(ceil(log2(line))) + EXTRA_TAG_BITS; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + debug ? 32768 : (int)XML->sys.core[ithCore].icache.icache_config[0]; + interface_ip.line_sz = + debug ? 64 : (int)XML->sys.core[ithCore].icache.icache_config[1]; + interface_ip.assoc = + debug ? 8 : (int)XML->sys.core[ithCore].icache.icache_config[2]; + interface_ip.nbanks = + debug ? 1 : (int)XML->sys.core[ithCore].icache.icache_config[3]; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].icache.icache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + // interface_ip.obj_func_dyn_energy = 0; + // interface_ip.obj_func_dyn_power = 0; + // interface_ip.obj_func_leak_power = 0; + // interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + icache.caches = new ArrayST(&interface_ip, + "icache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + scktRatio = g_tp.sckt_co_eff; + chip_PR_overhead = g_tp.chip_layout_overhead; + macro_PR_overhead = g_tp.macro_layout_overhead; + icache.area.set_area(icache.area.get_area() + + icache.caches->local_result.area); + area.set_area(area.get_area() + icache.caches->local_result.area); + // output_data_csv(icache.caches.local_result); + + /* + *iCache controllers + *miss buffer Each MSHR contains enough state + *to handle one or more accesses of any type to a single memory line. + *Due to the generality of the MSHR mechanism, + *the amount of state involved is non-trivial: + *including the address, pointers to the cache entry and destination register, + *written data, and various other pieces of state. + */ + interface_ip.num_search_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + + icache.caches->l_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].icache.buffer_sizes[0] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / + clockRate; // means cycle time + interface_ip.latency = debug + ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / + clockRate; // means access time + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.missb = new ArrayST(&interface_ip, + "icacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + + icache.missb->local_result.area); + area.set_area(area.get_area() + icache.missb->local_result.area); + // output_data_csv(icache.missb.local_result); + + // fill buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = icache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = data * XML->sys.core[ithCore].icache.buffer_sizes[1]; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.ifb = new ArrayST(&interface_ip, + "icacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); + area.set_area(area.get_area() + icache.ifb->local_result.area); + // output_data_csv(icache.ifb.local_result); + + // prefetch buffer + tag = XML->sys.physical_address_width + + EXTRA_TAG_BITS; // check with previous entries to decide wthether to + // merge. + data = icache.caches->l_ip + .line_sz; // separate queue to prevent from cache polution. + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = + XML->sys.core[ithCore].icache.buffer_sizes[2] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + XML->sys.core[ithCore].number_instruction_fetch_ports; + icache.prefetchb = new ArrayST(&interface_ip, + "icacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.area.set_area(icache.area.get_area() + + icache.prefetchb->local_result.area); + area.set_area(area.get_area() + icache.prefetchb->local_result.area); + // output_data_csv(icache.prefetchb.local_result); + + // Instruction buffer + data = + XML->sys.core[ithCore].instruction_length * + XML->sys.core[ithCore] + .peak_issue_width; // icache.caches.l_ip.line_sz; //multiple + // threads timing sharing the instruction buffer. + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.pure_cam = false; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].number_hardware_threads * + XML->sys.core[ithCore].instruction_buffer_size * + interface_ip.line_sz > + 64 + ? XML->sys.core[ithCore].number_hardware_threads * + XML->sys.core[ithCore].instruction_buffer_size * + interface_ip.line_sz + : 64; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + // NOTE: Assuming IB is time slice shared among threads, every fetch op will + // at least fetch "fetch width" instructions. + interface_ip.num_rw_ports = + debug + ? 1 + : XML->sys.core[ithCore] + .number_instruction_fetch_ports; // XML->sys.core[ithCore].fetch_width; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + IB = new ArrayST(&interface_ip, + "InstBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + IB->area.set_area(IB->area.get_area() + IB->local_result.area); + area.set_area(area.get_area() + IB->local_result.area); + // output_data_csv(IB.IB.local_result); + + // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; + // inst_decoder.init_decoder(is_default, &interface_ip); + // inst_decoder.full_decoder_power(); + + if (coredynp.predictionW > 0) { + /* + * BTB branch target buffer, accessed during IF stage. Virtually indexed and + * virtually tagged It is only a cache without all the buffers in the cache + * controller since it is more like a look up table than a cache with cache + * controller. When access miss, no load from other places such as main + * memory (not actively fill the misses), it is passively updated under two + * circumstances: 1) when BPT@ID stage finds out current is a taken branch + * while BTB missed 2) When BPT@ID stage predicts differently than BTB 3) + * When ID stage finds out current instruction is not a branch while BTB had + * a hit.(mark as invalid) 4) when EXEU find out wrong target has been + * provided from BTB. + * + */ + size = XML->sys.core[ithCore].BTB.BTB_config[0]; + line = XML->sys.core[ithCore].BTB.BTB_config[1]; + assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; + banks = XML->sys.core[ithCore].BTB.BTB_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + // tag = + // debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + + // int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + // +EXTRA_TAG_BITS; + tag = debug ? 51 + : XML->sys.virtual_address_width + + int(ceil(log2( + XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = debug ? 32768 : size; + interface_ip.line_sz = debug ? 64 : line; + interface_ip.assoc = debug ? 8 : assoc; + interface_ip.nbanks = debug ? 1 : banks; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].BTB.BTB_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].BTB.BTB_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = coredynp.predictionW; + interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.num_se_rd_ports = 0; + BTB = new ArrayST(&interface_ip, + "Branch Target Buffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); + area.set_area(area.get_area() + BTB->local_result.area); + /// cout<<"area="<area.get_area()); + } + + ID_inst = new inst_decoder(is_default, + &interface_ip, + coredynp.opcode_length, + 1 /*Decoder should not know how many by itself*/, + coredynp.x86, + Core_device, + coredynp.core_ty); + + ID_operand = new inst_decoder(is_default, + &interface_ip, + coredynp.arch_ireg_width, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); + + ID_misc = new inst_decoder(is_default, + &interface_ip, + 8 /* Prefix field etc upto 14B*/, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); + // TODO: X86 decoder should decode the inst in cyclic mode under the control + // of squencer. So the dynamic power should be multiplied by a few times. + area.set_area(area.get_area() + + (ID_inst->area.get_area() + ID_operand->area.get_area() + + ID_misc->area.get_area()) * + coredynp.decodeW); +} + +void InstFetchU::computeEnergy(bool is_tdp) { + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + icache.caches->stats_t.readAc.access = + icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; + icache.caches->stats_t.readAc.miss = 0; + icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - + icache.caches->stats_t.readAc.miss; + icache.caches->tdp_stats = icache.caches->stats_t; + + icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = + icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = + icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb->tdp_stats = icache.missb->stats_t; + + icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb->tdp_stats = icache.ifb->stats_t; + + icache.prefetchb->stats_t.readAc.access = + icache.prefetchb->stats_t.readAc.hit = + icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = + icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; + + IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + XML->sys.core[ithCore].peak_issue_width; + IB->tdp_stats = IB->stats_t; + + if (coredynp.predictionW > 0) { + BTB->stats_t.readAc.access = + coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; + BTB->stats_t.writeAc.access = + 0; // XML->sys.core[ithCore].BTB.write_accesses; + } + + ID_inst->stats_t.readAc.access = coredynp.decodeW; + ID_operand->stats_t.readAc.access = coredynp.decodeW; + ID_misc->stats_t.readAc.access = coredynp.decodeW; + ID_inst->tdp_stats = ID_inst->stats_t; + ID_operand->tdp_stats = ID_operand->stats_t; + ID_misc->tdp_stats = ID_misc->stats_t; + + } else { + // init stats for Runtime Dynamic (RTP) + icache.caches->stats_t.readAc.access = + XML->sys.core[ithCore].icache.read_accesses; + icache.caches->stats_t.readAc.miss = + XML->sys.core[ithCore].icache.read_misses; + icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - + icache.caches->stats_t.readAc.miss; + icache.caches->rtp_stats = icache.caches->stats_t; + + icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; + icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; + icache.missb->rtp_stats = icache.missb->stats_t; + + icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; + icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; + icache.ifb->rtp_stats = icache.ifb->stats_t; + + icache.prefetchb->stats_t.readAc.access = + icache.caches->stats_t.readAc.miss; + icache.prefetchb->stats_t.writeAc.access = + icache.caches->stats_t.readAc.miss; + icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; + + IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + XML->sys.core[ithCore].total_instructions; + IB->rtp_stats = IB->stats_t; + + if (coredynp.predictionW > 0) { + BTB->stats_t.readAc.access = + XML->sys.core[ithCore] + .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; + BTB->stats_t.writeAc.access = + XML->sys.core[ithCore] + .BTB + .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; + BTB->rtp_stats = BTB->stats_t; + } + + ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_operand->stats_t.readAc.access = + XML->sys.core[ithCore].total_instructions; + ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_inst->rtp_stats = ID_inst->stats_t; + ID_operand->rtp_stats = ID_operand->stats_t; + ID_misc->rtp_stats = ID_misc->stats_t; + } + + icache.power_t.reset(); + IB->power_t.reset(); + // ID_inst->power_t.reset(); + // ID_operand->power_t.reset(); + // ID_misc->power_t.reset(); + if (coredynp.predictionW > 0) { + BTB->power_t.reset(); + } + + icache.power_t.readOp.dynamic += + (icache.caches->stats_t.readAc.hit * + icache.caches->local_result.power.readOp.dynamic + + // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ + icache.caches->stats_t.readAc.miss * + icache.caches->local_result.power.readOp + .dynamic + // assume tag data accessed in parallel + icache.caches->stats_t.readAc.miss * + icache.caches->local_result.power.writeOp + .dynamic); // read miss in Icache cause a write to Icache + icache.power_t.readOp.dynamic += + icache.missb->stats_t.readAc.access * + icache.missb->local_result.power.searchOp.dynamic + + icache.missb->stats_t.writeAc.access * + icache.missb->local_result.power.writeOp + .dynamic; // each access to missb involves a CAM and a write + icache.power_t.readOp.dynamic += + icache.ifb->stats_t.readAc.access * + icache.ifb->local_result.power.searchOp.dynamic + + icache.ifb->stats_t.writeAc.access * + icache.ifb->local_result.power.writeOp.dynamic; + icache.power_t.readOp.dynamic += + icache.prefetchb->stats_t.readAc.access * + icache.prefetchb->local_result.power.searchOp.dynamic + + icache.prefetchb->stats_t.writeAc.access * + icache.prefetchb->local_result.power.writeOp.dynamic; + + IB->power_t.readOp.dynamic += + IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + + IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; + + if (coredynp.predictionW > 0) { + BTB->power_t.readOp.dynamic += + BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + + BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; + + BPT->computeEnergy(is_tdp); + } + + if (is_tdp) { + // icache.power = icache.power_t + + // (icache.caches->local_result.power)*pppm_lkg + + // (icache.missb->local_result.power + + // icache.ifb->local_result.power + + // icache.prefetchb->local_result.power)*pppm_Isub; + icache.power = icache.power_t + (icache.caches->local_result.power + + icache.missb->local_result.power + + icache.ifb->local_result.power + + icache.prefetchb->local_result.power) * + pppm_lkg; + + IB->power = IB->power_t + IB->local_result.power * pppm_lkg; + power = power + icache.power + IB->power; + if (coredynp.predictionW > 0) { + BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; + power = power + BTB->power + BPT->power; + } + + ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; + ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; + ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; + + ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; + ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; + ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; + + power = power + (ID_inst->power + ID_operand->power + ID_misc->power); + } else { + // icache.rt_power = icache.power_t + + // (icache.caches->local_result.power)*pppm_lkg + + // (icache.missb->local_result.power + + // icache.ifb->local_result.power + + // icache.prefetchb->local_result.power)*pppm_Isub; + + icache.rt_power = icache.power_t + (icache.caches->local_result.power + + icache.missb->local_result.power + + icache.ifb->local_result.power + + icache.prefetchb->local_result.power) * + pppm_lkg; + + IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; + rt_power = rt_power + icache.rt_power + IB->rt_power; + if (coredynp.predictionW > 0) { + BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; + rt_power = rt_power + BTB->rt_power + BPT->rt_power; + } + + ID_inst->rt_power.readOp.dynamic = + ID_inst->power_t.readOp.dynamic * ID_inst->rtp_stats.readAc.access; + ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * + ID_operand->rtp_stats.readAc.access; + ID_misc->rt_power.readOp.dynamic = + ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; + + rt_power = rt_power + + (ID_inst->rt_power + ID_operand->rt_power + ID_misc->rt_power); + } +} + +void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + + cout << indent_str << "Instruction Cache:" << endl; + cout << indent_str_next << "Area = " << icache.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << icache.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? icache.power.readOp.longer_channel_leakage + : icache.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? icache.power.readOp.power_gated_with_long_channel_leakage + : icache.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << icache.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (coredynp.predictionW > 0) { + cout << indent_str << "Branch Target Buffer:" << endl; + cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? BTB->power.readOp.longer_channel_leakage + : BTB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? BTB->power.readOp.power_gated_with_long_channel_leakage + : BTB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (BPT->exist) { + cout << indent_str << "Branch Predictor:" << endl; + cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? BPT->power.readOp.longer_channel_leakage + : BPT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? BPT->power.readOp.power_gated_with_long_channel_leakage + : BPT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (plevel > 3) { + BPT->displayEnergy(indent + 4, plevel, is_tdp); + } + } + } + cout << indent_str << "Instruction Buffer:" << endl; + cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? IB->power.readOp.longer_channel_leakage + : IB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? IB->power.readOp.power_gated_with_long_channel_leakage + : IB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + cout << indent_str << "Instruction Decoder:" << endl; + cout << indent_str_next << "Area = " + << (ID_inst->area.get_area() + ID_operand->area.get_area() + + ID_misc->area.get_area()) * + coredynp.decodeW * 1e-6 + << " mm^2" << endl; + cout << indent_str_next << "Peak Dynamic = " + << (ID_inst->power.readOp.dynamic + ID_operand->power.readOp.dynamic + + ID_misc->power.readOp.dynamic) * + clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? (ID_inst->power.readOp.longer_channel_leakage + + ID_operand->power.readOp.longer_channel_leakage + + ID_misc->power.readOp.longer_channel_leakage) + : (ID_inst->power.readOp.leakage + + ID_operand->power.readOp.leakage + + ID_misc->power.readOp.leakage)) + << " W" << endl; + + double tot_leakage = + (ID_inst->power.readOp.leakage + ID_operand->power.readOp.leakage + + ID_misc->power.readOp.leakage); + double tot_leakage_longchannel = + (ID_inst->power.readOp.longer_channel_leakage + + ID_operand->power.readOp.longer_channel_leakage + + ID_misc->power.readOp.longer_channel_leakage); + double tot_leakage_pg = (ID_inst->power.readOp.power_gated_leakage + + ID_operand->power.readOp.power_gated_leakage + + ID_misc->power.readOp.power_gated_leakage); + double tot_leakage_pg_with_long_channel = + (ID_inst->power.readOp.power_gated_with_long_channel_leakage + + ID_operand->power.readOp.power_gated_with_long_channel_leakage + + ID_misc->power.readOp.power_gated_with_long_channel_leakage); + + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) + << " W" << endl; + cout << indent_str_next << "Gate Leakage = " + << (ID_inst->power.readOp.gate_leakage + + ID_operand->power.readOp.gate_leakage + + ID_misc->power.readOp.gate_leakage) + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << (ID_inst->rt_power.readOp.dynamic + + ID_operand->rt_power.readOp.dynamic + + ID_misc->rt_power.readOp.dynamic) / + executionTime + << " W" << endl; + cout << endl; + } else { + // cout << indent_str_next << "Instruction Cache Peak Dynamic = " + //<< icache.rt_power.readOp.dynamic*clockRate << " W" << endl; + // cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " + // << icache.rt_power.readOp.leakage <<" W" << endl; cout << + // indent_str_next << "Instruction Cache Gate Leakage = " << + // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Instruction Buffer Peak Dynamic = " << + // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << + // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << + // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next + // << "Instruction Buffer Gate Leakage = " << + // IB->rt_power.readOp.gate_leakage + //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer + // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << + // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold + // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout + // << indent_str_next << "Branch Target Buffer Gate Leakage = " << + // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << + // indent_str_next << "Branch Predictor Peak Dynamic = " << + // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << + // BPT->rt_power.readOp.leakage << " W" << endl; cout << + // indent_str_next + // << "Branch Predictor Gate Leakage = " << + // BPT->rt_power.readOp.gate_leakage + //<< " W" << endl; + } +} + +InstFetchU ::~InstFetchU() { + + if (!exist) + return; + if (IB) { + delete IB; + IB = 0; + } + if (ID_inst) { + delete ID_inst; + ID_inst = 0; + } + if (ID_operand) { + delete ID_operand; + ID_operand = 0; + } + if (ID_misc) { + delete ID_misc; + ID_misc = 0; + } + if (coredynp.predictionW > 0) { + if (BTB) { + delete BTB; + BTB = 0; + } + if (BPT) { + delete BPT; + BPT = 0; + } + } +} diff --git a/src/core/instfetch.h b/src/core/instfetch.h new file mode 100644 index 0000000..e32ff76 --- /dev/null +++ b/src/core/instfetch.h @@ -0,0 +1,75 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __INST_FETCH_U_H__ +#define __INST_FETCH_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "branch_predictor.h" +#include "instcache.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class InstFetchU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + enum Cache_policy cache_p; + InstCache icache; + ArrayST *IB; + ArrayST *BTB; + BranchPredictor *BPT; + inst_decoder *ID_inst; + inst_decoder *ID_operand; + inst_decoder *ID_misc; + bool exist; + + InstFetchU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exsit = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~InstFetchU(); +}; + +#endif // __INST_FETCH_U_H__ diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc new file mode 100644 index 0000000..a961130 --- /dev/null +++ b/src/core/loadstore.cc @@ -0,0 +1,747 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "loadstore.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +LoadStoreU::LoadStoreU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), LSQ(0), LoadQ(0), exist(exist_) { + if (!exist) + return; + int idx, tag, data, size, line, assoc, banks; + bool debug = false; + int ldst_opcode = XML->sys.core[ithCore].opcode_width; // 16; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; + + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + // Dcache + size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; + line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; + assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; + banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; + idx = debug ? 9 : int(ceil(log2(size / line / assoc))); + tag = debug ? 51 + : XML->sys.physical_address_width - idx - int(ceil(log2(line))) + + EXTRA_TAG_BITS; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + debug ? 32768 : (int)XML->sys.core[ithCore].dcache.dcache_config[0]; + interface_ip.line_sz = + debug ? 64 : (int)XML->sys.core[ithCore].dcache.dcache_config[1]; + interface_ip.assoc = + debug ? 8 : (int)XML->sys.core[ithCore].dcache.dcache_config[2]; + interface_ip.nbanks = + debug ? 1 : (int)XML->sys.core[ithCore].dcache.dcache_config[3]; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = + 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 3.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.is_cache = true; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + debug + ? 1 + : XML->sys.core[ithCore] + .memory_ports; // usually In-order has 1 and OOO has 2 at least. + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.caches = new ArrayST(&interface_ip, + "dcache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.caches->local_result.area); + area.set_area(area.get_area() + dcache.caches->local_result.area); + // output_data_csv(dcache.caches.local_result); + + // dCache controllers + // miss buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + + dcache.caches->l_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[0] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.missb = new ArrayST(&interface_ip, + "dcacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.missb->local_result.area); + area.set_area(area.get_area() + dcache.missb->local_result.area); + // output_data_csv(dcache.missb.local_result); + + // fill buffer + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = dcache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = data * XML->sys.core[ithCore].dcache.buffer_sizes[1]; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.ifb = new ArrayST(&interface_ip, + "dcacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + dcache.ifb->local_result.area); + area.set_area(area.get_area() + dcache.ifb->local_result.area); + // output_data_csv(dcache.ifb.local_result); + + // prefetch buffer + tag = XML->sys.physical_address_width + + EXTRA_TAG_BITS; // check with previous entries to decide wthether to + // merge. + data = dcache.caches->l_ip + .line_sz; // separate queue to prevent from cache polution. + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[2] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; + ; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.prefetchb = new ArrayST(&interface_ip, + "dcacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.prefetchb->local_result.area); + area.set_area(area.get_area() + dcache.prefetchb->local_result.area); + // output_data_csv(dcache.prefetchb.local_result); + + // WBB + + if (cache_p == Write_back) { + tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; + data = dcache.caches->l_ip.line_sz; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = data; + interface_ip.cache_sz = + XML->sys.core[ithCore].dcache.buffer_sizes[3] * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + dcache.wbb = new ArrayST(&interface_ip, + "dcacheWBB", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.area.set_area(dcache.area.get_area() + + dcache.wbb->local_result.area); + area.set_area(area.get_area() + dcache.wbb->local_result.area); + // output_data_csv(dcache.wbb.local_result); + } + + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both + * loadQ and StoreQ + */ + tag = ldst_opcode + XML->sys.virtual_address_width + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.machine_bits; + interface_ip.is_cache = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].store_buffer_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + LSQ = new ArrayST(&interface_ip, + "Load(Store)Queue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + LSQ->area.set_area(LSQ->area.get_area() + LSQ->local_result.area); + area.set_area(area.get_area() + LSQ->local_result.area); + // output_data_csv(LSQ.LSQ.local_result); + lsq_height = + LSQ->local_result.cache_ht * + sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].load_buffer_size * interface_ip.line_sz; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + LoadQ = new ArrayST(&interface_ip, + "LoadQueue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); + area.set_area(area.get_area() + LoadQ->local_result.area); + // output_data_csv(LoadQ.LoadQ.local_result); + lsq_height = + (LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht) * + sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + } + area.set_area(area.get_area() * cdb_overhead); +} + +void LoadStoreU::computeEnergy(bool is_tdp) { + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + dcache.caches->stats_t.readAc.access = + 0.67 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches->stats_t.readAc.miss = 0; + dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - + dcache.caches->stats_t.readAc.miss; + dcache.caches->stats_t.writeAc.access = + 0.33 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches->stats_t.writeAc.miss = 0; + dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - + dcache.caches->stats_t.writeAc.miss; + dcache.caches->tdp_stats = dcache.caches->stats_t; + + dcache.missb->stats_t.readAc.access = + dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb->stats_t.writeAc.access = + dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb->tdp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb->stats_t.writeAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb->tdp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.prefetchb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb->stats_t.writeAc.access = + dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; + if (cache_p == Write_back) { + dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; + dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; + dcache.wbb->tdp_stats = dcache.wbb->stats_t; + } + + LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = + LSQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LSQ->tdp_stats = LSQ->stats_t; + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = + LoadQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LoadQ->tdp_stats = LoadQ->stats_t; + } + } else { + // init stats for Runtime Dynamic (RTP) + dcache.caches->stats_t.readAc.access = + XML->sys.core[ithCore].dcache.read_accesses; + dcache.caches->stats_t.readAc.miss = + XML->sys.core[ithCore].dcache.read_misses; + dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - + dcache.caches->stats_t.readAc.miss; + dcache.caches->stats_t.writeAc.access = + XML->sys.core[ithCore].dcache.write_accesses; + dcache.caches->stats_t.writeAc.miss = + XML->sys.core[ithCore].dcache.write_misses; + dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - + dcache.caches->stats_t.writeAc.miss; + dcache.caches->rtp_stats = dcache.caches->stats_t; + + if (cache_p == Write_back) { + dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.missb->stats_t.writeAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.missb->rtp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.ifb->rtp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.prefetchb->stats_t.writeAc.access = + dcache.caches->stats_t.writeAc.miss; + dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; + + dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; + dcache.wbb->rtp_stats = dcache.wbb->stats_t; + } else { + dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; + dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; + dcache.missb->rtp_stats = dcache.missb->stats_t; + + dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; + dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; + dcache.ifb->rtp_stats = dcache.ifb->stats_t; + + dcache.prefetchb->stats_t.readAc.access = + dcache.caches->stats_t.readAc.miss; + dcache.prefetchb->stats_t.writeAc.access = + dcache.caches->stats_t.readAc.miss; + dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; + } + + LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions) * + 2; // flush overhead considered + LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions) * + 2; + LSQ->rtp_stats = LSQ->stats_t; + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions; + LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions; + LoadQ->rtp_stats = LoadQ->stats_t; + } + } + + dcache.power_t.reset(); + LSQ->power_t.reset(); + dcache.power_t.readOp.dynamic += + (dcache.caches->stats_t.readAc.hit * + dcache.caches->local_result.power.readOp.dynamic + + dcache.caches->stats_t.readAc.miss * + dcache.caches->local_result.power.readOp + .dynamic + // assuming D cache is in the fast model which read + // tag and data together + dcache.caches->stats_t.writeAc.miss * + dcache.caches->local_result.tag_array2->power.readOp.dynamic + + dcache.caches->stats_t.writeAc.access * + dcache.caches->local_result.power.writeOp.dynamic); + + if (cache_p == Write_back) { // write miss will generate a write later + dcache.power_t.readOp.dynamic += + dcache.caches->stats_t.writeAc.miss * + dcache.caches->local_result.power.writeOp.dynamic; + } + + dcache.power_t.readOp.dynamic += + dcache.missb->stats_t.readAc.access * + dcache.missb->local_result.power.searchOp.dynamic + + dcache.missb->stats_t.writeAc.access * + dcache.missb->local_result.power.writeOp + .dynamic; // each access to missb involves a CAM and a write + dcache.power_t.readOp.dynamic += + dcache.ifb->stats_t.readAc.access * + dcache.ifb->local_result.power.searchOp.dynamic + + dcache.ifb->stats_t.writeAc.access * + dcache.ifb->local_result.power.writeOp.dynamic; + dcache.power_t.readOp.dynamic += + dcache.prefetchb->stats_t.readAc.access * + dcache.prefetchb->local_result.power.searchOp.dynamic + + dcache.prefetchb->stats_t.writeAc.access * + dcache.prefetchb->local_result.power.writeOp.dynamic; + if (cache_p == Write_back) { + dcache.power_t.readOp.dynamic += + dcache.wbb->stats_t.readAc.access * + dcache.wbb->local_result.power.searchOp.dynamic + + dcache.wbb->stats_t.writeAc.access * + dcache.wbb->local_result.power.writeOp.dynamic; + } + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->power_t.reset(); + LoadQ->power_t.readOp.dynamic += + LoadQ->stats_t.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->stats_t.writeAc.access * + LoadQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LoadQ + + LSQ->power_t.readOp.dynamic += + LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->stats_t.writeAc.access * + LSQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LSQ + + } else { + LSQ->power_t.readOp.dynamic += + LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->stats_t.writeAc.access * + LSQ->local_result.power.writeOp + .dynamic; // every memory access invloves at least two + // operations on LSQ + } + + if (is_tdp) { + // dcache.power = dcache.power_t + + // (dcache.caches->local_result.power)*pppm_lkg + + // (dcache.missb->local_result.power + + // dcache.ifb->local_result.power + + // dcache.prefetchb->local_result.power + + // dcache.wbb->local_result.power)*pppm_Isub; + dcache.power = dcache.power_t + (dcache.caches->local_result.power + + dcache.missb->local_result.power + + dcache.ifb->local_result.power + + dcache.prefetchb->local_result.power) * + pppm_lkg; + if (cache_p == Write_back) { + dcache.power = dcache.power + dcache.wbb->local_result.power * pppm_lkg; + } + + LSQ->power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; + power = power + dcache.power + LSQ->power; + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; + power = power + LoadQ->power; + } + } else { + // dcache.rt_power = dcache.power_t + + // (dcache.caches->local_result.power + + // dcache.missb->local_result.power + // + dcache.ifb->local_result.power + + // dcache.prefetchb->local_result.power + + // dcache.wbb->local_result.power)*pppm_lkg; + dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + + dcache.missb->local_result.power + + dcache.ifb->local_result.power + + dcache.prefetchb->local_result.power) * + pppm_lkg; + + if (cache_p == Write_back) { + dcache.rt_power = + dcache.rt_power + dcache.wbb->local_result.power * pppm_lkg; + } + + LSQ->rt_power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; + rt_power = rt_power + dcache.rt_power + LSQ->rt_power; + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; + rt_power = rt_power + LoadQ->rt_power; + } + } +} + +void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Data Cache:" << endl; + cout << indent_str_next << "Area = " << dcache.area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << dcache.power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? dcache.power.readOp.longer_channel_leakage + : dcache.power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? dcache.power.readOp.power_gated_with_long_channel_leakage + : dcache.power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << dcache.rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if (coredynp.core_ty == Inorder) { + cout << indent_str << "Load/Store Queue:" << endl; + cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LSQ->power.readOp.longer_channel_leakage + : LSQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? LSQ->power.readOp.power_gated_with_long_channel_leakage + : LSQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else + + { + if (XML->sys.core[ithCore].load_buffer_size > 0) { + cout << indent_str << "LoadQ:" << endl; + cout << indent_str_next << "Area = " << LoadQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LoadQ->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LoadQ->power.readOp.longer_channel_leakage + : LoadQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? LoadQ->power.readOp + .power_gated_with_long_channel_leakage + : LoadQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LoadQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + cout << indent_str << "StoreQ:" << endl; + cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? LSQ->power.readOp.longer_channel_leakage + : LSQ->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? LSQ->power.readOp.power_gated_with_long_channel_leakage + : LSQ->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else { + cout << indent_str_next << "Data Cache Peak Dynamic = " + << dcache.rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Data Cache Subthreshold Leakage = " + << dcache.rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Data Cache Gate Leakage = " + << dcache.rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.core_ty == Inorder) { + cout << indent_str_next << "Load/Store Queue Peak Dynamic = " + << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " + << LSQ->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Load/Store Queue Gate Leakage = " + << LSQ->rt_power.readOp.gate_leakage << " W" << endl; + } else { + cout << indent_str_next << "LoadQ Peak Dynamic = " + << LoadQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "LoadQ Subthreshold Leakage = " + << LoadQ->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "StoreQ Peak Dynamic = " + << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage + << " W" << endl; + } + } +} + +LoadStoreU ::~LoadStoreU() { + + if (!exist) + return; + if (LSQ) { + delete LSQ; + LSQ = 0; + } + if (LoadQ) { + delete LoadQ; + LoadQ = 0; + } +} diff --git a/src/core/loadstore.h b/src/core/loadstore.h new file mode 100644 index 0000000..500d32e --- /dev/null +++ b/src/core/loadstore.h @@ -0,0 +1,72 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __LOAD_STORE_U_H__ +#define __LOAD_STORE_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "datacache.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class LoadStoreU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + enum Cache_policy cache_p; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + double lsq_height; + DataCache dcache; + ArrayST *LSQ; // it is actually the store queue but for inorder processors it + // serves as both loadQ and StoreQ + ArrayST *LoadQ; + bool exist; + + LoadStoreU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~LoadStoreU(); +}; + +#endif // __LOAD_STORE_U_H__ diff --git a/src/core/mmu.cc b/src/core/mmu.cc new file mode 100644 index 0000000..776b6fb --- /dev/null +++ b/src/core/mmu.cc @@ -0,0 +1,287 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "mmu.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +MemManU::MemManU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), itlb(0), dtlb(0), exist(exist_) { + if (!exist) + return; + int tag, data; + bool debug = false; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.specific_tag = 1; + // Itlb TLBs are partioned among threads according to Nigara and Nehalem + tag = XML->sys.virtual_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))) + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.physical_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))); + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].itlb.number_entries * + interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; + itlb = new ArrayST( + &interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); + itlb->area.set_area(itlb->area.get_area() + itlb->local_result.area); + area.set_area(area.get_area() + itlb->local_result.area); + // output_data_csv(itlb.tlb.local_result); + + // dtlb + tag = XML->sys.virtual_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))) + + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + + EXTRA_TAG_BITS; + data = XML->sys.physical_address_width - + int(floor(log2(XML->sys.virtual_memory_page_size))); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.line_sz = + int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); + interface_ip.cache_sz = + XML->sys.core[ithCore].dtlb.number_entries * + interface_ip.line_sz; //*XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 0; + interface_ip.throughput = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[4] / clockRate; + interface_ip.latency = + debug ? 1.0 / clockRate + : XML->sys.core[ithCore].dcache.dcache_config[5] / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; + dtlb = new ArrayST( + &interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); + dtlb->area.set_area(dtlb->area.get_area() + dtlb->local_result.area); + area.set_area(area.get_area() + dtlb->local_result.area); + // output_data_csv(dtlb.tlb.local_result); +} + +void MemManU::computeEnergy(bool is_tdp) { + + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + itlb->stats_t.readAc.access = + itlb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; + itlb->stats_t.readAc.miss = 0; + itlb->stats_t.readAc.hit = + itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; + itlb->tdp_stats = itlb->stats_t; + + dtlb->stats_t.readAc.access = + dtlb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dtlb->stats_t.readAc.miss = 0; + dtlb->stats_t.readAc.hit = + dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; + dtlb->tdp_stats = dtlb->stats_t; + } else { + // init stats for Runtime Dynamic (RTP) + itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; + itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; + itlb->stats_t.readAc.hit = + itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; + itlb->rtp_stats = itlb->stats_t; + + dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; + dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; + dtlb->stats_t.readAc.hit = + dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; + dtlb->rtp_stats = dtlb->stats_t; + } + + itlb->power_t.reset(); + dtlb->power_t.reset(); + itlb->power_t.readOp.dynamic += + itlb->stats_t.readAc.access * itlb->local_result.power.searchOp + .dynamic // FA spent most power in tag, + // so use total access not hits + + itlb->stats_t.readAc.miss * itlb->local_result.power.writeOp.dynamic; + dtlb->power_t.readOp.dynamic += + dtlb->stats_t.readAc.access * dtlb->local_result.power.searchOp + .dynamic // FA spent most power in tag, + // so use total access not hits + + dtlb->stats_t.readAc.miss * dtlb->local_result.power.writeOp.dynamic; + + if (is_tdp) { + itlb->power = itlb->power_t + itlb->local_result.power * pppm_lkg; + dtlb->power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; + power = power + itlb->power + dtlb->power; + } else { + itlb->rt_power = itlb->power_t + itlb->local_result.power * pppm_lkg; + dtlb->rt_power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; + rt_power = rt_power + itlb->rt_power + dtlb->rt_power; + } +} + +void MemManU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Itlb:" << endl; + cout << indent_str_next << "Area = " << itlb->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << itlb->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? itlb->power.readOp.longer_channel_leakage + : itlb->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? itlb->power.readOp.power_gated_with_long_channel_leakage + : itlb->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << itlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Dtlb:" << endl; + cout << indent_str_next << "Area = " << dtlb->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << dtlb->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? dtlb->power.readOp.longer_channel_leakage + : dtlb->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? dtlb->power.readOp.power_gated_with_long_channel_leakage + : dtlb->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << dtlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } else { + cout << indent_str_next << "Itlb Peak Dynamic = " + << itlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Dtlb Peak Dynamic = " + << dtlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage + << " W" << endl; + } +} + +MemManU ::~MemManU() { + + if (!exist) + return; + if (itlb) { + delete itlb; + itlb = 0; + } + if (dtlb) { + delete dtlb; + dtlb = 0; + } +} diff --git a/src/core/mmu.h b/src/core/mmu.h new file mode 100644 index 0000000..19e6312 --- /dev/null +++ b/src/core/mmu.h @@ -0,0 +1,67 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __MEMORY_MANAGEMENT_U_H__ +#define __MEMORY_MANAGEMENT_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class MemManU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + ArrayST *itlb; + ArrayST *dtlb; + bool exist; + + MemManU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~MemManU(); +}; + +#endif // __MEMORY_MANAGEMENT_U_H__ diff --git a/src/core/regfile.cc b/src/core/regfile.cc new file mode 100644 index 0000000..bd77879 --- /dev/null +++ b/src/core/regfile.cc @@ -0,0 +1,439 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "regfile.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +RegFU::RegFU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), IRF(0), FRF(0), RFWIN(0), exist(exist_) { + /* + * processors have separate architectural register files for each thread. + * therefore, the bypass buses need to travel across all the register files. + */ + + if (!exist) + return; + int data; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + //**********************************IRF*************************************** + data = coredynp.int_data_width; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.cache_sz = coredynp.num_IRF_entry * interface_ip.line_sz; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 2 * coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + IRF = new ArrayST(&interface_ip, + "Integer Register File", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + IRF->area.set_area(IRF->area.get_area() + + IRF->local_result.area * coredynp.num_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + IRF->local_result.area * coredynp.num_pipelines * cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + // area.set_area(area.get_area()*cdb_overhead); + // output_data_csv(IRF.RF.local_result); + + //**********************************FRF*************************************** + data = coredynp.fp_data_width; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 32.0)) * 4; + interface_ip.cache_sz = coredynp.num_FRF_entry * interface_ip.line_sz; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 2 * XML->sys.core[ithCore].issue_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; + interface_ip.num_se_rd_ports = 0; + FRF = new ArrayST(&interface_ip, + "Floating point Register File", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + FRF->area.set_area(FRF->area.get_area() + + FRF->local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + FRF->local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + // area.set_area(area.get_area()*cdb_overhead); + // output_data_csv(FRF.RF.local_result); + int_regfile_height = IRF->local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); + fp_regfile_height = FRF->local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); + // since a EXU is associated with each pipeline, the cdb should not have + // longer length. + if (coredynp.regWindowing) { + //*********************************REG_WIN************************************ + data = + coredynp + .int_data_width; // ECC, and usually 2 regs are transfered together + // during window shifting.Niagara Mega cell + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size * + IRF->l_ip.cache_sz * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 4.0 / clockRate; + interface_ip.latency = 4.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + 1; // this is the transfer port for saving/restoring states when + // exceptions happen. + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + RFWIN = new ArrayST(&interface_ip, + "RegWindow", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + RFWIN->area.set_area(RFWIN->area.get_area() + + RFWIN->local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + RFWIN->local_result.area * coredynp.num_pipelines); + // output_data_csv(RFWIN.RF.local_result); + } +} + +void RegFU::computeEnergy(bool is_tdp) { + /* + * Architecture RF and physical RF cannot be present at the same time. + * Therefore, the RF stats can only refer to either ARF or PRF; + * And the same stats can be used for both. + */ + if (!exist) + return; + if (is_tdp) { + // init stats for Peak + IRF->stats_t.readAc.access = + coredynp.issueW * 2 * + (coredynp.ALU_duty_cycle * 1.1 + + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * + coredynp.num_pipelines; + IRF->stats_t.writeAc.access = + coredynp.issueW * + (coredynp.ALU_duty_cycle * 1.1 + + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * + coredynp.num_pipelines; + // Rule of Thumb: about 10% RF related instructions do not need to access + // ALUs + IRF->tdp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports * + coredynp.FPU_duty_cycle * 1.05 * + coredynp.num_fp_pipelines; + FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports * + coredynp.FPU_duty_cycle * 1.05 * + coredynp.num_fp_pipelines; + FRF->tdp_stats = FRF->stats_t; + if (coredynp.regWindowing) { + RFWIN->stats_t.readAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; + RFWIN->stats_t.writeAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; + RFWIN->tdp_stats = RFWIN->stats_t; + } + } else { + // init stats for Runtime Dynamic (RTP) + IRF->stats_t.readAc.access = + XML->sys.core[ithCore] + .int_regfile_reads; // TODO: no diff on archi and phy + IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; + IRF->rtp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; + FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; + FRF->rtp_stats = FRF->stats_t; + if (coredynp.regWindowing) { + RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls * 16; + RFWIN->stats_t.writeAc.access = + XML->sys.core[ithCore].function_calls * 16; + RFWIN->rtp_stats = RFWIN->stats_t; + + IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + + XML->sys.core[ithCore].function_calls * 16; + IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + + XML->sys.core[ithCore].function_calls * 16; + IRF->rtp_stats = IRF->stats_t; + + FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + + XML->sys.core[ithCore].function_calls * 16; + ; + FRF->stats_t.writeAc.access = + XML->sys.core[ithCore].float_regfile_writes + + XML->sys.core[ithCore].function_calls * 16; + ; + FRF->rtp_stats = FRF->stats_t; + } + } + IRF->power_t.reset(); + FRF->power_t.reset(); + IRF->power_t.readOp.dynamic += + (IRF->stats_t.readAc.access * IRF->local_result.power.readOp.dynamic + + IRF->stats_t.writeAc.access * IRF->local_result.power.writeOp.dynamic); + FRF->power_t.readOp.dynamic += + (FRF->stats_t.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->stats_t.writeAc.access * FRF->local_result.power.writeOp.dynamic); + if (coredynp.regWindowing) { + RFWIN->power_t.reset(); + RFWIN->power_t.readOp.dynamic += + (RFWIN->stats_t.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->stats_t.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic); + } + + if (is_tdp) { + IRF->power = IRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) + : IRF->local_result.power); + FRF->power = FRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) + : FRF->local_result.power); + power = power + (IRF->power + FRF->power); + if (coredynp.regWindowing) { + RFWIN->power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; + power = power + RFWIN->power; + } + } else { + IRF->rt_power = + IRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) + : IRF->local_result.power); + FRF->rt_power = + FRF->power_t + + ((coredynp.scheu_ty == ReservationStation) + ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) + : FRF->local_result.power); + rt_power = rt_power + (IRF->power_t + FRF->power_t); + if (coredynp.regWindowing) { + RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; + rt_power = rt_power + RFWIN->rt_power; + } + } +} + +void RegFU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + cout << indent_str << "Integer RF:" << endl; + cout << indent_str_next << "Area = " << IRF->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << IRF->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? IRF->power.readOp.longer_channel_leakage + : IRF->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? IRF->power.readOp.power_gated_with_long_channel_leakage + : IRF->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + cout << indent_str << "Floating Point RF:" << endl; + cout << indent_str_next << "Area = " << FRF->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << FRF->power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? FRF->power.readOp.longer_channel_leakage + : FRF->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? FRF->power.readOp.power_gated_with_long_channel_leakage + : FRF->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next + << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic / executionTime + << " W" << endl; + cout << endl; + if (coredynp.regWindowing) { + cout << indent_str << "Register Windows:" << endl; + cout << indent_str_next << "Area = " << RFWIN->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << RFWIN->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? RFWIN->power.readOp.longer_channel_leakage + : RFWIN->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? RFWIN->power.readOp.power_gated_with_long_channel_leakage + : RFWIN->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << RFWIN->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else { + cout << indent_str_next << "Integer RF Peak Dynamic = " + << IRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Integer RF Subthreshold Leakage = " + << IRF->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Floating Point RF Peak Dynamic = " + << FRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " + << FRF->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Floating Point RF Gate Leakage = " + << FRF->rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.regWindowing) { + cout << indent_str_next << "Register Windows Peak Dynamic = " + << RFWIN->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Register Windows Subthreshold Leakage = " + << RFWIN->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Register Windows Gate Leakage = " + << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; + } + } +} + +RegFU ::~RegFU() { + + if (!exist) + return; + if (IRF) { + delete IRF; + IRF = 0; + } + if (FRF) { + delete FRF; + FRF = 0; + } + if (RFWIN) { + delete RFWIN; + RFWIN = 0; + } +} diff --git a/src/core/regfile.h b/src/core/regfile.h new file mode 100644 index 0000000..264874e --- /dev/null +++ b/src/core/regfile.h @@ -0,0 +1,70 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __REGFILE_U_H__ +#define __REGFILE_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class RegFU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + CoreDynParam coredynp; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + double int_regfile_height; + double fp_regfile_height; + ArrayST *IRF; + ArrayST *FRF; + ArrayST *RFWIN; + bool exist; + + RegFU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RegFU(); +}; + +#endif // __REGFILE_U_H__ diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc new file mode 100644 index 0000000..f1d6e15 --- /dev/null +++ b/src/core/renaming_unit.cc @@ -0,0 +1,1412 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "renaming_unit.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +RENAMINGU::RENAMINGU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), iFRAT(0), fFRAT(0), iRRAT(0), fRRAT(0), ifreeL(0), + ffreeL(0), idcl(0), fdcl(0), RAHT(0), exist(exist_) { + /* + * Although renaming logic maybe be used in in-order processors, +* McPAT assumes no renaming logic is used since the performance gain is very +limited and +* the only major inorder processor with renaming logic is Itainium +* that is a VLIW processor and different from current McPAT's model. + * physical register base OOO must have Dual-RAT architecture or equivalent +structure.FRAT:FrontRAT, RRAT:RetireRAT; + * i,f prefix mean int and fp + * RAT for all Renaming logic, random accessible checkpointing is used, but +only update when instruction retires. + * FRAT will be read twice and written once per instruction; + * RRAT will be write once per instruction when committing and reads out all +when context switch + * + * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, + * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, + * + * RAM-based RAT is duplicated/partitioned for each different hardware threads + * CAM-based RAT is shared for all hardware threads + * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N +(N-way SMT) sets of entries, with each set for a thread. + * The RAT control logic will determine different sets to use for different +threads. But it does not need extra tag bits in the entries. + * However, CAM-based RAT need extra tag bits to distinguish the architecture +register ids for different threads. + + * + * checkpointing of RAT and RRAT are both for architecture state recovery with +events including mis-speculation; + * Checkpointing is easier to implement in CAM than in RAM based RAT, despite +of the inferior scalabilty of the CAM-based RATs. + * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 +checkpoints (based on MIPS designs) for RAM based RATs, + * thus CAM-based RAT does not need RRAT + * Although no Dual-RAT is needed in RS-based OOO processors, since archi +RegFile contains the committed register values, + * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, +when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT. + * + * RAM-base RAT does not need to scan/search all contents during instruction +commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is +used for index the RAT entry to be updated. + * + * Both RAM and CAM have same DCL + * + + * + */ + if (!exist) + return; + int tag, data, out_w; + // interface_ip.wire_is_mat_type = 0; + // interface_ip.wire_os_mat_type = 0; + // interface_ip.wt = Global_30; + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + if (coredynp.core_ty == OOO) { + // integer pipeline + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == + RAMbased) { // FRAT with global checkpointing (GCs) please see paper + // tech report for detailed explanation. + data = int(ceil(coredynp.phy_ireg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); // 33; + out_w = int(ceil(coredynp.phy_ireg_width / 8.0)); // bytes + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_IRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT floating point + data = int(ceil(coredynp.phy_freg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_FRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if (coredynp.rm_ty == CAMbased) { + // FRAT + tag = coredynp.arch_ireg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // each checkpoint in the CAM-based RAT design needs + // only 1 bit, see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + out_w = int(ceil(coredynp.arch_ireg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.decodeW; + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT for FP + tag = coredynp.arch_freg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // each checkpoint in the CAM-based RAT design needs + // only 1 bit, see "a power-aware hybrid ram-cam + // renaming mechanism for fast recovery" + out_w = int(ceil(coredynp.arch_freg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + } + + // RRAT is always RAM based, does not have GCs, and is used only for + // record latest non-speculative mapping RRAT is not needed for CAM-based + // RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is + // not needed for RAM-based RAT with checkpoints McPAT assumes renaming + // unit to have RRAT when there is no checkpoints in FRAT, while MIPS + // R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with + // FRAT is very costly, especially for high issue width and high clock + // rate. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + + // RRAT for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); + } + // Freelist of renaming unit always RAM based and needed for RAM-based + // RATs. Although it can be implemented within the CAM-based RAT, Current + // McPAT does not have the free bits in the CAM but use the same external + // free list as a close approximation for CAM RAT. Recycle happens at two + // places: 1)when DCL check there are WAW, the Phy-registers/ROB directly + // recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + // therefore num_wr port = decode-1(-1 means at least one phy reg will be + // used for the current renaming group) + commit width + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // TODO + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = + coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; + // every cycle, (coredynp.decodeW -1) inst may need to send back it dest + // tags, committW insts needs to update freelist buffers + interface_ip.num_se_rd_ports = 0; + ifreeL = new ArrayST(&interface_ip, + "Int Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + ifreeL->area.set_area(ifreeL->area.get_area() + + ifreeL->local_result.area); + area.set_area(area.get_area() + ifreeL->area.get_area()); + + // freelist for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ffreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = + coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + ffreeL = new ArrayST(&interface_ip, + "FP Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + ffreeL->area.set_area(ffreeL->area.get_area() + + ffreeL->local_result.area); + area.set_area(area.get_area() + ffreeL->area.get_area()); + + idcl = new dep_resource_conflict_check( + &interface_ip, + coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); + + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + + data = int(ceil(coredynp.phy_ireg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_ireg_width / + 8.0)); // GC does not need to be readout + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_IRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->local_result.adjust_area(); + // iFRAT->local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // iFRAT->local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FP + data = int(ceil(coredynp.phy_freg_width * + (1 + coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * + XML->sys.core[ithCore].archi_Regs_FRF_size * + XML->sys.core[ithCore].number_hardware_threads; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // the extra one port is for GCs + interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->local_result.adjust_area(); + // fFRAT->local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // fFRAT->local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if (coredynp.rm_ty == CAMbased) { + // FRAT + tag = coredynp.arch_ireg_width + coredynp.hthread_width; + data = int(ceil( + (coredynp.arch_ireg_width + 1 * coredynp.globalCheckpoint) / 8.0)); + out_w = int(ceil(coredynp.arch_ireg_width / + 8.0)); // GC bits does not need to be sent out + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_IRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = coredynp.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.decodeW; + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + + // FRAT + tag = coredynp.arch_freg_width + coredynp.hthread_width; + data = int( + ceil((coredynp.arch_freg_width + 1 * coredynp.globalCheckpoint) / + 8.0)); // the address of CAM needed to be sent out + out_w = int(ceil(coredynp.arch_freg_width / 8.0)); + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * XML->sys.core[ithCore].phy_Regs_FRF_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = out_w * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 2; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // for GCs + interface_ip.num_rd_ports = + XML->sys.core[ithCore].decode_width; // 0;TODO; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + } + // Although no RRAT for RS based OOO is really needed since the archiRF + // always holds the non-speculative data, having the RRAT or GC (not both) + // can help the recovery of mis-speculations. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_IRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + + // RRAT for FP + data = int(ceil(coredynp.phy_freg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].archi_Regs_FRF_size * 2 * + XML->sys.core[ithCore] + .number_hardware_threads; // HACK--2 to make it as least 64B + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_decodeW; + interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); + } + + // Freelist of renaming unit of RS based OOO is unifed for both int and fp + // renaming unit since the ROB is unified + data = int(ceil(coredynp.phy_ireg_width / 8.0)); + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = data * coredynp.num_ifreelist_entries; + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; // TODO + interface_ip.num_rd_ports = coredynp.decodeW; + interface_ip.num_wr_ports = + coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; + interface_ip.num_se_rd_ports = 0; + ifreeL = new ArrayST(&interface_ip, + "Unified Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + // ifreeL->area.set_area(ifreeL->area.get_area()+ + // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); + area.set_area(area.get_area() + ifreeL->area.get_area()); + + idcl = new dep_resource_conflict_check( + &interface_ip, + coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); + } + } + if (coredynp.core_ty == Inorder && coredynp.issueW > 1) { + /* Dependency check logic will only present when decode(issue) width>1. + * Multiple issue in order processor can do without renaming, but dcl is a + * must. + */ + idcl = new dep_resource_conflict_check( + &interface_ip, + coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl = new dep_resource_conflict_check( + &interface_ip, coredynp, coredynp.phy_freg_width); + } +} + +void RENAMINGU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double pppm_t[4] = {1, 1, 1, 1}; + if (is_tdp) { // init stats for Peak + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; + } + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports;; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; + + ffreeL->stats_t.readAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_rd_ports; + ffreeL->stats_t.writeAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_wr_ports; + ffreeL->tdp_stats = ffreeL->stats_t; + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; + } + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; + } + // Unified free list for both int and fp + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; + } + idcl->stats_t.readAc.access = coredynp.decodeW; + fdcl->stats_t.readAc.access = coredynp.decodeW; + idcl->tdp_stats = idcl->stats_t; + fdcl->tdp_stats = fdcl->stats_t; + } else { + if (coredynp.issueW > 1) { + idcl->stats_t.readAc.access = coredynp.decodeW; + fdcl->stats_t.readAc.access = coredynp.decodeW; + idcl->tdp_stats = idcl->stats_t; + fdcl->tdp_stats = fdcl->stats_t; + } + } + + } else { // init stats for Runtime Dynamic (RTP) + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .fp_rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + fRRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; + } + ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + ifreeL->stats_t.writeAc.access = + 2 * XML->sys.core[ithCore].rename_writes; + ifreeL->rtp_stats = ifreeL->stats_t; + + ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + ffreeL->stats_t.writeAc.access = + 2 * XML->sys.core[ithCore].fp_rename_writes; + ffreeL->rtp_stats = ffreeL->stats_t; + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + // iFRAT->stats_t.searchAc.access = + // XML->sys.core[ithCore].committed_int_instructions;//hack: not all + // committed instructions use regs. + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + // fFRAT->stats_t.searchAc.access = + // XML->sys.core[ithCore].committed_fp_instructions; + fFRAT->rtp_stats = fFRAT->stats_t; + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; + + fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; + } + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + fRRAT->stats_t.readAc.access = + XML->sys.core[ithCore] + .fp_rename_writes; // Hack, should be (context switch + branch + // mispredictions)*16 + fRRAT->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; + } + // Unified free list for both int and fp since the ROB act as physcial + // registers + ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + + XML->sys.core[ithCore].fp_rename_reads; + ifreeL->stats_t.writeAc.access = + 2 * (XML->sys.core[ithCore].rename_writes + + XML->sys.core[ithCore] + .fp_rename_writes); // HACK: 2-> since some of renaming in + // the same group are terminated early + ifreeL->rtp_stats = ifreeL->stats_t; + } + idcl->stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * + XML->sys.core[ithCore].rename_reads; + fdcl->stats_t.readAc.access = 3 * coredynp.fp_issueW * + coredynp.fp_issueW * + XML->sys.core[ithCore].fp_rename_writes; + idcl->rtp_stats = idcl->stats_t; + fdcl->rtp_stats = fdcl->stats_t; + } else { + if (coredynp.issueW > 1) { + idcl->stats_t.readAc.access = + 2 * XML->sys.core[ithCore].int_instructions; + fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; + idcl->rtp_stats = idcl->stats_t; + fdcl->rtp_stats = fdcl->stats_t; + } + } + } + /* Compute engine */ + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); + } + + ifreeL->power_t.reset(); + ffreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); + ffreeL->power_t.readOp.dynamic += + (ffreeL->stats_t.readAc.access * + ffreeL->local_result.power.readOp.dynamic + + ffreeL->stats_t.writeAc.access * + ffreeL->local_result.power.writeOp.dynamic); + + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } else if (coredynp.rm_ty == CAMbased) { + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); + } + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); + } + + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); + } + + } else { + if (coredynp.issueW > 1) { + idcl->power_t.reset(); + fdcl->power_t.reset(); + set_pppm(pppm_t, + idcl->stats_t.readAc.access, + coredynp.num_hthreads, + coredynp.num_hthreads, + idcl->stats_t.readAc.access); + idcl->power_t = idcl->power * pppm_t; + set_pppm(pppm_t, + fdcl->stats_t.readAc.access, + coredynp.num_hthreads, + coredynp.num_hthreads, + idcl->stats_t.readAc.access); + fdcl->power_t = fdcl->power * pppm_t; + } + } + + // assign value to tpd and rtp + if (is_tdp) { + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->power = ffreeL->power_t + ffreeL->local_result.power; + power = power + + (iFRAT->power + fFRAT->power) + //+ (iRRAT->power + fRRAT->power) + + (ifreeL->power + ffreeL->power); + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); + } + } else if (coredynp.scheu_ty == ReservationStation) { + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + power = power + (iFRAT->power + fFRAT->power) + ifreeL->power; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); + } + } + } else { + power = power + idcl->power_t + fdcl->power_t; + } + + } else { + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power; + rt_power = rt_power + + (iFRAT->rt_power + fFRAT->rt_power) + // + (iRRAT->rt_power + + // fRRAT->rt_power) + + (ifreeL->rt_power + ffreeL->rt_power); + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + } + } else if (coredynp.scheu_ty == ReservationStation) { + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + rt_power = + rt_power + (iFRAT->rt_power + fFRAT->rt_power) + ifreeL->rt_power; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + } + } + } else { + rt_power = rt_power + idcl->power_t + fdcl->power_t; + } + } +} + +void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + + if (coredynp.core_ty == OOO) { + cout << indent_str << "Int Front End RAT with " + << coredynp.globalCheckpoint << " internal checkpoints:" << endl; + cout << indent_str_next << "Area = " << iFRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << iFRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? iFRAT->power.readOp.longer_channel_leakage + : iFRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? iFRAT->power.readOp.power_gated_with_long_channel_leakage + : iFRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << iFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "FP Front End RAT with " + << coredynp.globalCheckpoint << " internal checkpoints:" << endl; + cout << indent_str_next << "Area = " << fFRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fFRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fFRAT->power.readOp.longer_channel_leakage + : fFRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fFRAT->power.readOp.power_gated_with_long_channel_leakage + : fFRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "Free List:" << endl; + cout << indent_str_next << "Area = " << ifreeL->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ifreeL->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ifreeL->power.readOp.longer_channel_leakage + : ifreeL->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ifreeL->power.readOp.power_gated_with_long_channel_leakage + : ifreeL->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ifreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + cout << indent_str << "Int Retire RAT: " << endl; + cout << indent_str_next << "Area = " << iRRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << iRRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? iRRAT->power.readOp.longer_channel_leakage + : iRRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? iRRAT->power.readOp + .power_gated_with_long_channel_leakage + : iRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << iRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + cout << indent_str << "FP Retire RAT:" << endl; + cout << indent_str_next << "Area = " << fRRAT->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fRRAT->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fRRAT->power.readOp.longer_channel_leakage + : fRRAT->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? fRRAT->power.readOp + .power_gated_with_long_channel_leakage + : fRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + if (coredynp.scheu_ty == PhysicalRegFile) { + cout << indent_str << "FP Free List:" << endl; + cout << indent_str_next << "Area = " << ffreeL->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ffreeL->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ffreeL->power.readOp.longer_channel_leakage + : ffreeL->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? ffreeL->power.readOp + .power_gated_with_long_channel_leakage + : ffreeL->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ffreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else { + cout << indent_str << "Int DCL:" << endl; + cout << indent_str_next + << "Peak Dynamic = " << idcl->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? idcl->power.readOp.longer_channel_leakage + : idcl->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? idcl->power.readOp.power_gated_with_long_channel_leakage + : idcl->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << idcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << indent_str << "FP DCL:" << endl; + cout << indent_str_next + << "Peak Dynamic = " << fdcl->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? fdcl->power.readOp.longer_channel_leakage + : fdcl->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fdcl->power.readOp.power_gated_with_long_channel_leakage + : fdcl->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fdcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + } + } else { + if (coredynp.core_ty == OOO) { + cout << indent_str_next << "Int Front End RAT Peak Dynamic = " + << iFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " + << iFRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Int Front End RAT Gate Leakage = " + << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Front End RAT Peak Dynamic = " + << fFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " + << fFRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Front End RAT Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Free List Peak Dynamic = " + << ifreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Free List Subthreshold Leakage = " + << ifreeL->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Free List Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + if (coredynp.scheu_ty == PhysicalRegFile) { + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + cout << indent_str_next << "Int Retire RAT Peak Dynamic = " + << iRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " + << iRRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Int Retire RAT Gate Leakage = " + << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Retire RAT Peak Dynamic = " + << fRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " + << fRRAT->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Retire RAT Gate Leakage = " + << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; + } + cout << indent_str_next << "FP Free List Peak Dynamic = " + << ffreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP Free List Subthreshold Leakage = " + << ffreeL->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Free List Gate Leakage = " + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + } + } else { + cout << indent_str_next << "Int DCL Peak Dynamic = " + << idcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Int DCL Subthreshold Leakage = " + << idcl->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "FP DCL Peak Dynamic = " + << fdcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "FP DCL Subthreshold Leakage = " + << fdcl->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage + << " W" << endl; + } + } +} + +RENAMINGU ::~RENAMINGU() { + + if (!exist) + return; + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (iRRAT) { + delete iRRAT; + iRRAT = 0; + } + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (ifreeL) { + delete ifreeL; + ifreeL = 0; + } + if (idcl) { + delete idcl; + idcl = 0; + } + if (fFRAT) { + delete fFRAT; + fFRAT = 0; + } + if (fRRAT) { + delete fRRAT; + fRRAT = 0; + } + if (fdcl) { + delete fdcl; + fdcl = 0; + } + if (ffreeL) { + delete ffreeL; + ffreeL = 0; + } + if (RAHT) { + delete RAHT; + RAHT = 0; + } +} diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h new file mode 100644 index 0000000..0b2bbba --- /dev/null +++ b/src/core/renaming_unit.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __RENAMING_U_H__ +#define __RENAMING_U_H__ + +#include "XML_Parse.h" +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class RENAMINGU : public Component { +public: + ParseXML *XML; + int ithCore; + InputParameter interface_ip; + double clockRate; + double executionTime; + CoreDynParam coredynp; + ArrayST *iFRAT; + ArrayST *fFRAT; + ArrayST *iRRAT; + ArrayST *fRRAT; + ArrayST *ifreeL; + ArrayST *ffreeL; + dep_resource_conflict_check *idcl; + dep_resource_conflict_check *fdcl; + ArrayST *RAHT; // register alias history table Used to store GC + bool exist; + + RENAMINGU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeEnergy(bool is_tdp = true); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RENAMINGU(); +}; + +#endif // __RENAMING_U_H__ diff --git a/src/core/scheduler.cc b/src/core/scheduler.cc new file mode 100644 index 0000000..9525c5e --- /dev/null +++ b/src/core/scheduler.cc @@ -0,0 +1,795 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "scheduler.h" + +#include "XML_Parse.h" +#include "basic_circuit.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +#include +#include +#include +#include +#include + +SchedulerU::SchedulerU(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) + : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), + coredynp(dyn_p_), int_inst_window(0), fp_inst_window(0), ROB(0), + instruction_selection(0), exist(exist_) { + if (!exist) + return; + int tag, data; + bool is_default = true; + string tmp_name; + + clockRate = coredynp.clockRate; + executionTime = coredynp.executionTime; + if ((coredynp.core_ty == Inorder && coredynp.multithreaded)) { + // Instruction issue queue, in-order multi-issue or multithreaded processor + // also has this structure. Unified window for Inorder processors + tag = int(log2(XML->sys.core[ithCore].number_hardware_threads) * + coredynp.perThreadState); // This is the normal thread state bits + // based on Niagara Design + data = XML->sys.core[ithCore].instruction_length; + // NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and + // IA-32 Architectures Software Developer’s Manual + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = int(ceil(data / 8.0)); + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.cache_sz = + XML->sys.core[ithCore].instruction_window_size * interface_ip.line_sz > + 64 + ? XML->sys.core[ithCore].instruction_window_size * + interface_ip.line_sz + : 64; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.peak_issueW; + int_inst_window = new ArrayST(&interface_ip, + "InstFetchQueue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + int_inst_window->area.set_area(int_inst_window->area.get_area() + + int_inst_window->local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window->local_result.area * coredynp.num_pipelines); + // output_data_csv(iRS.RS.local_result); + Iw_height = int_inst_window->local_result.cache_ht; + + /* + * selection logic + * In a single-issue Inorder multithreaded processor like Niagara, issue + * width=1*number_of_threads since the processor does need to pick up + * instructions from multiple ready ones(although these ready ones are from + * different threads).While SMT processors do not distinguish which thread + * belongs to who at the issue stage. + */ + interface_ip.assoc = + 1; // reset to prevent unnecessary warning messages when init_interface + instruction_selection = new selection_logic( + is_default, + XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW * XML->sys.core[ithCore].number_hardware_threads, + &interface_ip, + Core_device, + coredynp.core_ty); + } + + if (coredynp.core_ty == OOO) { + /* + * CAM based instruction window + * For physicalRegFilebased OOO it is the instruction issue queue, where + * only tags of phy regs are stored For RS based OOO it is the Reservation + * station, where both tags and values of phy regs are stored It is written + * once and read twice(two operands) before an instruction can be issued. + * X86 instruction can be very long up to 15B. add instruction length in XML + */ + if (coredynp.scheu_ty == PhysicalRegFile) { + tag = coredynp.phy_ireg_width; + // Each time only half of the tag is compared, but two tag should be + // stored. This underestimate the search power + data = + int((ceil((coredynp.instruction_length + + 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width)) / + 2.0) / + 8.0)); + // Data width being divided by 2 means only after both operands available + // the whole data will be read out. This is modeled using two equivalent + // readouts with half of the data width + tmp_name = "InstIssueQueue"; + } else { + tag = coredynp.phy_ireg_width; + // Each time only half of the tag is compared, but two tag should be + // stored. This underestimate the search power + data = + int(ceil(((coredynp.instruction_length + + 2 * (coredynp.phy_ireg_width - coredynp.arch_ireg_width) + + 2 * coredynp.int_data_width) / + 2.0) / + 8.0)); + // Data width being divided by 2 means only after both operands available + // the whole data will be read out. This is modeled using two equivalent + // readouts with half of the data width + + tmp_name = "IntReservationStation"; + } + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].instruction_window_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 2 * 1.0 / clockRate; + interface_ip.latency = 2 * 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_issueW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.peak_issueW; + int_inst_window = new ArrayST(&interface_ip, + tmp_name, + Core_device, + coredynp.opt_local, + coredynp.core_ty); + int_inst_window->area.set_area(int_inst_window->area.get_area() + + int_inst_window->local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window->local_result.area * coredynp.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + // FU inst window + if (coredynp.scheu_ty == PhysicalRegFile) { + tag = 2 * coredynp.phy_freg_width; // TODO: each time only half of the tag + // is compared + data = + int(ceil((coredynp.instruction_length + + 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width)) / + 8.0)); + tmp_name = "FPIssueQueue"; + } else { + tag = 2 * coredynp.phy_ireg_width; + data = + int(ceil((coredynp.instruction_length + + 2 * (coredynp.phy_freg_width - coredynp.arch_freg_width) + + 2 * coredynp.fp_data_width) / + 8.0)); + tmp_name = "FPReservationStation"; + } + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore].fp_instruction_window_size; + interface_ip.assoc = 0; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.specific_tag = 1; + interface_ip.tag_w = tag; + interface_ip.access_mode = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.fp_issueW; + interface_ip.num_wr_ports = coredynp.fp_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = coredynp.fp_issueW; + fp_inst_window = new ArrayST(&interface_ip, + tmp_name, + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fp_inst_window->area.set_area(fp_inst_window->area.get_area() + + fp_inst_window->local_result.area * + coredynp.num_fp_pipelines); + area.set_area(area.get_area() + fp_inst_window->local_result.area * + coredynp.num_fp_pipelines); + fp_Iw_height = fp_inst_window->local_result.cache_ht; + + if (XML->sys.core[ithCore].ROB_size > 0) { + /* + * if ROB_size = 0, then the target processor does not support + *hardware-based speculation, i.e. , the processor allow OOO issue as well + *as OOO completion, which means branch must be resolved before + *instruction issued into instruction window, since there is no change to + *flush miss-predict branch path after instructions are issued in this + *situation. + * + * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. + * One old approach is to combine the RAT and ROB as a huge CAM structure + *as in AMD K7. However, this approach is abandoned due to its high power + *and poor scalability. McPAT uses current implementation of ROB as + *circular buffer. ROB is written once when instruction is issued and read + *once when the instruction is committed. * + */ + + int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); + data = int(ceil( + (robExtra + coredynp.pc_width + + ((coredynp.rm_ty == RAMbased) + ? (coredynp.phy_ireg_width + coredynp.phy_freg_width) + : fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width)) + + ((coredynp.scheu_ty == PhysicalRegFile) ? 0 + : coredynp.fp_data_width)) / + 8.0)); + /* + * 5 bits are: busy, Issued, Finished, speculative, valid; + * PC is to id the instruction for recover + * exception/mis-prediction. When using RAM-based RAT, ROB needs to + * contain the ARF-PRF mapping to index the correct entry in the RAT, so + * that the correct architecture register (and freelist) can be found and + * the RAT can be appropriately updated; otherwise, the RAM-based RAT + * needs to support search ops to identify the target architecture + * register that needs to be updated, or the physical resigner that needs + * to be recycled; When using CAM-based RAT, ROB only needs to contain + * destination physical register since the CAM-base RAT can search for the + * corresponding ARF-PRF mapping to find the correct entry in the RAT, so + * that the correct architecture register (and freelist/bits) can be found + * and the RAT can be appropriately updated. ROB phy_reg entry should use + * the larger one from phy_ireg and phy_freg; fdata_width is always + * larger. Latest Intel Processors may have different ROB/RS designs. + */ + + /* + if(coredynp.scheu_ty==PhysicalRegFile) + { + //PC is to id the instruction for recover + exception. + //inst is used to map the renamed dest. + registers.so that commit stage can know which reg/RRAT to update + // data = + int(ceil((robExtra+coredynp.pc_width + + + // coredynp.instruction_length + + 2*coredynp.phy_ireg_width)/8.0)); + + if (coredynp.rm_ty ==RAMbased) + { + data = int(ceil((robExtra + + coredynp.pc_width + (coredynp.phy_ireg_width, + coredynp.phy_freg_width))/8.0)); + //When using RAM-based RAT, ROB + needs to contain the ARF-PRF mapping to index the correct entry in the + RAT, + //so that the correct architecture + register (and freelist) can be found and the RAT can be appropriately + updated. + } + else if ((coredynp.rm_ty ==CAMbased)) + { + data = + int(ceil((robExtra+coredynp.pc_width + fmax(coredynp.phy_ireg_width, + coredynp.phy_freg_width))/8.0)); + //When using CAM-based RAT, ROB + needs to contain the ARF-PRF mapping to index the correct entry in the + RAT, + //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated. + } + } + else + { + //in RS based OOO, ROB also contains value + of destination reg + // data = + int(ceil((robExtra+coredynp.pc_width + + + // coredynp.instruction_length + 2*coredynp.phy_ireg_width + + coredynp.fp_data_width)/8.0)); + + //using phy_reg number to search in the + RAT, the correct architecture register can be found and the RAT can be + appropriately updated. + //ROB phy_reg entry should use the larger + one from ireg and freg; fdata_width is always larger; Latest Intel + Processors may have different ROB/RS designs. data = int(ceil((robExtra + + coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width) + + coredynp.fp_data_width)/8.0)); + } + */ + + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.line_sz = data; + interface_ip.cache_sz = + data * XML->sys.core[ithCore] + .ROB_size; // The XML ROB size is for all threads + interface_ip.assoc = 1; + interface_ip.nbanks = 1; + interface_ip.out_w = interface_ip.line_sz * 8; + interface_ip.access_mode = 1; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = coredynp.peak_commitW; + interface_ip.num_wr_ports = coredynp.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + ROB = new ArrayST(&interface_ip, + "ReorderBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + ROB->area.set_area(ROB->area.get_area() + + ROB->local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + ROB->local_result.area * coredynp.num_pipelines); + ROB_height = ROB->local_result.cache_ht; + } + + instruction_selection = + new selection_logic(is_default, + XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW, + &interface_ip, + Core_device, + coredynp.core_ty); + } +} + +void SchedulerU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) + return; + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + bool long_channel = XML->sys.longer_channel_device; + bool power_gating = XML->sys.power_gating; + + if (is_tdp) { + if (coredynp.core_ty == OOO) { + cout << indent_str << "Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? int_inst_window->power.readOp.longer_channel_leakage + : int_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? int_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : int_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + cout << indent_str << "FP Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << fp_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << fp_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? fp_inst_window->power.readOp.longer_channel_leakage + : fp_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? fp_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : fp_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << fp_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + if (XML->sys.core[ithCore].ROB_size > 0) { + cout << indent_str << "ROB:" << endl; + cout << indent_str_next << "Area = " << ROB->area.get_area() * 1e-6 + << " mm^2" << endl; + cout << indent_str_next + << "Peak Dynamic = " << ROB->power.readOp.dynamic * clockRate + << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? ROB->power.readOp.longer_channel_leakage + : ROB->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ROB->power.readOp.power_gated_with_long_channel_leakage + : ROB->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic = " + << ROB->rt_power.readOp.dynamic / executionTime << " W" << endl; + cout << endl; + } + } else if (coredynp.multithreaded) { + cout << indent_str << "Instruction Window:" << endl; + cout << indent_str_next + << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic = " + << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? int_inst_window->power.readOp.longer_channel_leakage + : int_inst_window->power.readOp.leakage) + << " W" << endl; + if (power_gating) + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? int_inst_window->power.readOp + .power_gated_with_long_channel_leakage + : int_inst_window->power.readOp.power_gated_leakage) + << " W" << endl; + cout << indent_str_next + << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << " W" << endl; + cout << indent_str_next << "Runtime Dynamic = " + << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << endl; + cout << endl; + } + } else { + if (coredynp.core_ty == OOO) { + cout << indent_str_next << "Instruction Window Peak Dynamic = " + << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Instruction Window Subthreshold Leakage = " + << int_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Window Gate Leakage = " + << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "FP Instruction Window Peak Dynamic = " + << fp_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next + << "FP Instruction Window Subthreshold Leakage = " + << fp_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "FP Instruction Window Gate Leakage = " + << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + if (XML->sys.core[ithCore].ROB_size > 0) { + cout << indent_str_next << "ROB Peak Dynamic = " + << ROB->rt_power.readOp.dynamic * clockRate << " W" << endl; + cout << indent_str_next + << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage + << " W" << endl; + } + } else if (coredynp.multithreaded) { + cout << indent_str_next << "Instruction Window Peak Dynamic = " + << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << endl; + cout << indent_str_next << "Instruction Window Subthreshold Leakage = " + << int_inst_window->rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next << "Instruction Window Gate Leakage = " + << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + } + } +} +void SchedulerU::computeEnergy(bool is_tdp) { + if (!exist) + return; + double ROB_duty_cycle; + // ROB_duty_cycle = ((coredynp.ALU_duty_cycle + + // coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 + // + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 + //? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 + // + + // coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; + ROB_duty_cycle = 1; + // init stats + if (is_tdp) { + if (coredynp.core_ty == OOO) { + int_inst_window->stats_t.readAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; + int_inst_window->stats_t.writeAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; + int_inst_window->stats_t.searchAc.access = + coredynp.issueW * coredynp.num_pipelines; + int_inst_window->tdp_stats = int_inst_window->stats_t; + fp_inst_window->stats_t.readAc.access = + fp_inst_window->l_ip.num_rd_ports * coredynp.num_fp_pipelines; + fp_inst_window->stats_t.writeAc.access = + fp_inst_window->l_ip.num_wr_ports * coredynp.num_fp_pipelines; + fp_inst_window->stats_t.searchAc.access = + fp_inst_window->l_ip.num_search_ports * coredynp.num_fp_pipelines; + fp_inst_window->tdp_stats = fp_inst_window->stats_t; + + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->stats_t.readAc.access = + coredynp.commitW * coredynp.num_pipelines * ROB_duty_cycle; + ROB->stats_t.writeAc.access = + coredynp.issueW * coredynp.num_pipelines * ROB_duty_cycle; + ROB->tdp_stats = ROB->stats_t; + + /* + * When inst commits, ROB must be read. + * Because for Physcial register based cores, physical register tag in + * ROB need to be read out and write into RRAT/CAM based RAT. For RS + * based cores, register content that stored in ROB must be read out and + * stored in architectural registers. + * + * if no-register is involved, the ROB read out operation when + * instruction commits can be ignored. assuming 20% insts. belong this + * type. + * TODO: ROB duty_cycle need to be revisited + */ + } + + } else if (coredynp.multithreaded) { + int_inst_window->stats_t.readAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; + int_inst_window->stats_t.writeAc.access = + coredynp.issueW * + coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; + int_inst_window->stats_t.searchAc.access = + coredynp.issueW * coredynp.num_pipelines; + int_inst_window->tdp_stats = int_inst_window->stats_t; + } + + } else { // rtp + if (coredynp.core_ty == OOO) { + int_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].inst_window_reads; + int_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].inst_window_writes; + int_inst_window->stats_t.searchAc.access = + XML->sys.core[ithCore].inst_window_wakeup_accesses; + int_inst_window->rtp_stats = int_inst_window->stats_t; + fp_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].fp_inst_window_reads; + fp_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].fp_inst_window_writes; + fp_inst_window->stats_t.searchAc.access = + XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; + fp_inst_window->rtp_stats = fp_inst_window->stats_t; + + if (XML->sys.core[ithCore].ROB_size > 0) { + + ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; + ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; + /* ROB need to be updated in RS based OOO when new values are produced, + * this update may happen before the commit stage when ROB entry is + * released + * 1. ROB write at instruction inserted in + * 2. ROB write as results produced (for RS based OOO only) + * 3. ROB read as instruction committed. For RS based OOO, data values + * are read out and sent to ARF For Physical reg based OOO, no data + * stored in ROB, but register tags need to be read out and used to set + * the RRAT and to recycle the register tag to free list buffer + */ + ROB->rtp_stats = ROB->stats_t; + } + + } else if (coredynp.multithreaded) { + int_inst_window->stats_t.readAc.access = + XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions; + int_inst_window->stats_t.writeAc.access = + XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions; + int_inst_window->stats_t.searchAc.access = + 2 * (XML->sys.core[ithCore].int_instructions + + XML->sys.core[ithCore].fp_instructions); + int_inst_window->rtp_stats = int_inst_window->stats_t; + } + } + + // computation engine + if (coredynp.core_ty == OOO) { + int_inst_window->power_t.reset(); + fp_inst_window->power_t.reset(); + + /* each instruction needs to write to scheduler, read out when all resources + * and source operands are ready two search ops with one for each source + * operand + * + */ + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->stats_t.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->stats_t.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->stats_t.writeAc.access + + int_inst_window->stats_t.readAc.access * + instruction_selection->power.readOp.dynamic; + + fp_inst_window->power_t.readOp.dynamic += + fp_inst_window->local_result.power.readOp.dynamic * + fp_inst_window->stats_t.readAc.access + + fp_inst_window->local_result.power.searchOp.dynamic * + fp_inst_window->stats_t.searchAc.access + + fp_inst_window->local_result.power.writeOp.dynamic * + fp_inst_window->stats_t.writeAc.access + + fp_inst_window->stats_t.writeAc.access * + instruction_selection->power.readOp.dynamic; + + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->power_t.reset(); + ROB->power_t.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * ROB->stats_t.readAc.access + + ROB->stats_t.writeAc.access * ROB->local_result.power.writeOp.dynamic; + } + + } else if (coredynp.multithreaded) { + int_inst_window->power_t.reset(); + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->stats_t.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->stats_t.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->stats_t.writeAc.access + + int_inst_window->stats_t.writeAc.access * + instruction_selection->power.readOp.dynamic; + } + + // assign values + if (is_tdp) { + if (coredynp.core_ty == OOO) { + int_inst_window->power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + fp_inst_window->power = + fp_inst_window->power_t + + (fp_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + power = power + int_inst_window->power + fp_inst_window->power; + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->power = ROB->power_t + ROB->local_result.power * pppm_lkg; + power = power + ROB->power; + } + + } else if (coredynp.multithreaded) { + // set_pppm(pppm_t, + // XML->sys.core[ithCore].issue_width,1, 1, 1); + int_inst_window->power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + power = power + int_inst_window->power; + } + + } else { // rtp + if (coredynp.core_ty == OOO) { + int_inst_window->rt_power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + fp_inst_window->rt_power = + fp_inst_window->power_t + + (fp_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + rt_power = + rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB->rt_power = ROB->power_t + ROB->local_result.power * pppm_lkg; + rt_power = rt_power + ROB->rt_power; + } + + } else if (coredynp.multithreaded) { + // set_pppm(pppm_t, + // XML->sys.core[ithCore].issue_width,1, 1, 1); + int_inst_window->rt_power = + int_inst_window->power_t + + (int_inst_window->local_result.power + instruction_selection->power) * + pppm_lkg; + rt_power = rt_power + int_inst_window->rt_power; + } + } + // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); + // cout<<"Scheduler + // power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage< Date: Thu, 11 Jun 2020 13:30:46 -0500 Subject: [PATCH 19/59] refactor: Changed cache interface, need to update the DataCache and InstCache to fit new interface --- src/basic_components.h | 18 - src/cache/CMakeLists.txt | 2 + src/cache/cache_param.cc | 130 +++++++ src/cache/cache_param.h | 77 +++++ src/cache/sharedcache.cc | 729 ++++++++++----------------------------- src/cache/sharedcache.h | 65 ++-- src/core/core.cc | 12 +- src/processor.cc | 44 ++- 8 files changed, 453 insertions(+), 624 deletions(-) create mode 100644 src/cache/cache_param.cc create mode 100644 src/cache/cache_param.h diff --git a/src/basic_components.h b/src/basic_components.h index b8f6ae5..64e3381 100644 --- a/src/basic_components.h +++ b/src/basic_components.h @@ -45,8 +45,6 @@ enum Renaming_type { RAMbased, CAMbased }; enum Scheduler_type { PhysicalRegFile, ReservationStation }; -enum cache_level { L2, L3, L1Directory, L2Directory }; - enum MemoryCtrl_type { MC, // memory controller FLASHC // flash controller @@ -163,22 +161,6 @@ class CoreDynParam { ~CoreDynParam(){}; }; -class CacheDynParam { -public: - CacheDynParam(){}; - CacheDynParam(ParseXML *XML_interface, int ithCache_); - string name; - enum Dir_type dir_ty; - double clockRate, executionTime; - double capacity, blockW, assoc, nbanks; - double throughput, latency; - double duty_cycle, dir_duty_cycle; - // double duty_cycle; - int missb_size, fu_size, prefetchb_size, wbb_size; - double vdd; - double power_gating_vcc; - ~CacheDynParam(){}; -}; class MCParam { public: diff --git a/src/cache/CMakeLists.txt b/src/cache/CMakeLists.txt index 8b954aa..df91a8a 100644 --- a/src/cache/CMakeLists.txt +++ b/src/cache/CMakeLists.txt @@ -1,4 +1,6 @@ add_library(cache + cache_param.h + cache_param.cc datacache.h datacache.cc instcache.h diff --git a/src/cache/cache_param.cc b/src/cache/cache_param.cc new file mode 100644 index 0000000..a0e1ad0 --- /dev/null +++ b/src/cache/cache_param.cc @@ -0,0 +1,130 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "cache_param.h" + +#include + +void CacheDynParam::set_params_l2_cache(const ParseXML* XML, const int ithCache) { + this->name = "L2"; + this->clockRate = XML->sys.L2[ithCache].clockrate; + this->clockRate *= 1e6; + this->executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + this->capacity = XML->sys.L2[ithCache].L2_config[0]; + this->blockW = XML->sys.L2[ithCache].L2_config[1]; + this->assoc = XML->sys.L2[ithCache].L2_config[2]; + this->nbanks = XML->sys.L2[ithCache].L2_config[3]; + this->throughput = XML->sys.L2[ithCache].L2_config[4] / this->clockRate; + this->latency = XML->sys.L2[ithCache].L2_config[5] / this->clockRate; + this->missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; + this->fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; + this->prefetchb_size = XML->sys.L2[ithCache].buffer_sizes[2]; + this->wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; + this->duty_cycle = XML->sys.L2[ithCache].duty_cycle; + if (!XML->sys.L2[ithCache].merged_dir) { + this->dir_ty = NonDir; + } else { + this->dir_ty = SBT; + this->dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; + } +} + +void CacheDynParam::set_params_l3_cache(const ParseXML* XML, const int ithCache) { + this->name = "L3"; + this->clockRate = XML->sys.L3[ithCache].clockrate; + this->clockRate *= 1e6; + this->executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + this->capacity = XML->sys.L3[ithCache].L3_config[0]; + this->blockW = XML->sys.L3[ithCache].L3_config[1]; + this->assoc = XML->sys.L3[ithCache].L3_config[2]; + this->nbanks = XML->sys.L3[ithCache].L3_config[3]; + this->throughput = XML->sys.L3[ithCache].L3_config[4] / this->clockRate; + this->latency = XML->sys.L3[ithCache].L3_config[5] / this->clockRate; + this->missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; + this->fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; + this->prefetchb_size = XML->sys.L3[ithCache].buffer_sizes[2]; + this->wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; + this->duty_cycle = XML->sys.L3[ithCache].duty_cycle; + if (!XML->sys.L2[ithCache].merged_dir) { + this->dir_ty = NonDir; + } else { + this->dir_ty = SBT; + this->dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; + } +} + +void CacheDynParam::set_params_l1_directory(const ParseXML* XML, const int ithCache) { + this->name = "First Level Directory"; + this->dir_ty = + (enum Dir_type)XML->sys.L1Directory[ithCache].Directory_type; + this->clockRate = XML->sys.L1Directory[ithCache].clockrate; + this->clockRate *= 1e6; + this->executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + this->capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; + this->blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; + this->assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; + this->nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; + this->throughput = + XML->sys.L1Directory[ithCache].Dir_config[4] / this->clockRate; + this->latency = + XML->sys.L1Directory[ithCache].Dir_config[5] / this->clockRate; + this->missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; + this->fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; + this->prefetchb_size = XML->sys.L1Directory[ithCache].buffer_sizes[2]; + this->wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; + this->duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; +} + +void CacheDynParam::set_params_l2_directory(const ParseXML* XML, const int ithCache) { + this->name = "Second Level Directory"; + this->dir_ty = + (enum Dir_type)XML->sys.L2Directory[ithCache].Directory_type; + this->clockRate = XML->sys.L2Directory[ithCache].clockrate; + this->clockRate *= 1e6; + this->executionTime = + XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); + this->capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; + this->blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; + this->assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; + this->nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; + this->throughput = + XML->sys.L2Directory[ithCache].Dir_config[4] / this->clockRate; + this->latency = + XML->sys.L2Directory[ithCache].Dir_config[5] / this->clockRate; + this->missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; + this->fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; + this->prefetchb_size = XML->sys.L2Directory[ithCache].buffer_sizes[2]; + this->wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; + this->duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; +} diff --git a/src/cache/cache_param.h b/src/cache/cache_param.h new file mode 100644 index 0000000..9df1d0d --- /dev/null +++ b/src/cache/cache_param.h @@ -0,0 +1,77 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __CACHE_PARAM_H__ +#define __CACHE_PARAM_H__ + +#include "basic_components.h" +#include "XML_Parse.h" +#include "parameter.h" + +#include + +enum cache_level { + L2, + L3, + L1Directory, + L2Directory +}; + +class CacheDynParam { +public: + string name; + enum Dir_type dir_ty; + double clockRate; + double executionTime; + double capacity; + double blockW; + double assoc; + double nbanks; + double throughput; + double latency; + double duty_cycle; + double dir_duty_cycle; + // double duty_cycle; + int missb_size; + int fu_size; + int prefetchb_size; + int wbb_size; + double vdd; + double power_gating_vcc; + CacheDynParam(){}; + ~CacheDynParam(){}; + void set_params_l2_cache(const ParseXML* XML, const int ithCache); + void set_params_l3_cache(const ParseXML* XML, const int ithCache); + void set_params_l1_directory(const ParseXML* XML, const int ithCache); + void set_params_l2_directory(const ParseXML* XML, const int ithCache); +}; + +#endif diff --git a/src/cache/sharedcache.cc b/src/cache/sharedcache.cc index 038bb94..775645c 100644 --- a/src/cache/sharedcache.cc +++ b/src/cache/sharedcache.cc @@ -46,18 +46,42 @@ #include #include -SharedCache::SharedCache(ParseXML *XML_interface, - int ithCache_, - InputParameter *interface_ip_, - enum cache_level cacheL_) - : XML(XML_interface), ithCache(ithCache_), interface_ip(*interface_ip_), - cacheL(cacheL_), dir_overhead(0) { - int idx; - int tag, data; - bool is_default, debug; - enum Device_ty device_t; - enum Core_type core_t; - double size, line, assoc, banks; +SharedCache::SharedCache() { + XML = nullptr; + long_channel = false; + power_gating = false; + init_params = false; + init_stats = false; + set_area = false; + cacheL = L2; + ithCache = 0; + dir_overhead = 0.0; + scktRatio = 0.0; + executionTime = 0.0; + + device_t = Core_device; + core_t = OOO; + + debug = false; + is_default = false; + + size = 0.0; + line = 0.0; + assoc = 0.0; + banks = 0.0; +} + +void SharedCache::set_params(const ParseXML* XML, + const int ithCache, + InputParameter* interface_ip_, + const enum cache_level cacheL_) { + int idx = 0; + int tag = 0; + int data = 0; + this->cacheL = cacheL_; + this->interface_ip = *interface_ip_; + this->ithCache = ithCache; + if (cacheL == L2 && XML->sys.Private_L2) { device_t = Core_device; core_t = (enum Core_type)XML->sys.core[ithCache].machine_type; @@ -66,6 +90,124 @@ SharedCache::SharedCache(ParseXML *XML_interface, core_t = Inorder; } + switch(cacheL) { + case L2 : { + cachep.set_params_l2_cache(XML, ithCache); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L2[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L2[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L2[ithCache].device_type; + if (XML->sys.Private_L2 && XML->sys.core[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.core[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.core[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.core[ithCache].vdd; + } + if (XML->sys.Private_L2 && XML->sys.core[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.core[ithCache].power_gating_vcc; + } + if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L2[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L2[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L2[ithCache].vdd; + } + if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L2[ithCache].power_gating_vcc; + } + break; + } + case L3 : { + cachep.set_params_l3_cache(XML, ithCache); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L3[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L3[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L3[ithCache].device_type; + if (XML->sys.L3[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L3[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L3[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L3[ithCache].vdd; + } + if (XML->sys.L3[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L3[ithCache].power_gating_vcc; + } + break; + } + case L1Directory : { + cachep.set_params_l1_directory(XML, ithCache); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L1Directory[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L1Directory[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L1Directory[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L1Directory[ithCache].device_type; + if (XML->sys.L1Directory[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L1Directory[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L1Directory[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L1Directory[ithCache].vdd; + } + if (XML->sys.L1Directory[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L1Directory[ithCache].power_gating_vcc; + } + break; + } + case L2Directory : { + cachep.set_params_l2_directory(XML, ithCache); + interface_ip.data_arr_ram_cell_tech_type = + XML->sys.L2Directory[ithCache].device_type; // long channel device LSTP + interface_ip.data_arr_peri_global_tech_type = + XML->sys.L2Directory[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L2Directory[ithCache].device_type; + interface_ip.tag_arr_peri_global_tech_type = + XML->sys.L2Directory[ithCache].device_type; + if (XML->sys.L2Directory[ithCache].vdd > 0) { + interface_ip.specific_hp_vdd = true; + interface_ip.specific_lop_vdd = true; + interface_ip.specific_lstp_vdd = true; + interface_ip.hp_Vdd = XML->sys.L2Directory[ithCache].vdd; + interface_ip.lop_Vdd = XML->sys.L2Directory[ithCache].vdd; + interface_ip.lstp_Vdd = XML->sys.L2Directory[ithCache].vdd; + } + if (XML->sys.L2Directory[ithCache].power_gating_vcc > -1) { + interface_ip.specific_vcc_min = true; + interface_ip.user_defined_vcc_min = + XML->sys.L2Directory[ithCache].power_gating_vcc; + } + break; + } + default : { + std::cerr << "[ SharedCache ] Error: Not a valid Cache Type" << std::endl; + exit(1); + } + } + debug = false; is_default = true; // indication for default setup if (XML->sys.Embedded) { @@ -77,13 +219,13 @@ SharedCache::SharedCache(ParseXML *XML_interface, interface_ip.wire_is_mat_type = 2; interface_ip.wire_os_mat_type = 2; } - set_cache_param(); // All lower level cache are physically indexed and tagged. size = cachep.capacity; line = cachep.blockW; assoc = cachep.assoc; banks = cachep.nbanks; + if ((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory)) { assoc = 0; @@ -254,356 +396,35 @@ SharedCache::SharedCache(ParseXML *XML_interface, unicache.wbb->local_result.area); area.set_area(area.get_area() + unicache.wbb->local_result.area); } - // //pipeline - // interface_ip.pipeline_stages = - // int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time)); - // interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + - // llCache.caches.l_ip.tag_w ; pipeLogicCache.init_pipeline(is_default, - // &interface_ip); pipeLogicCache.compute_pipeline(); - - /* - if (!((XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==1) - ||(XML->sys.number_of_dir_levels==1 && - XML->sys.first_level_dir ==2)))//not single level IC and DIC - { - //directory Now assuming one directory per bank, TODO:should change it later - size = XML->sys.L2directory.L2Dir_config[0]; - line = XML->sys.L2directory.L2Dir_config[1]; - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = - debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit - over estimate interface_ip.specific_tag = 0; interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = - 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = - XML->sys.L2directory.L2Dir_config[4]/clockRate; interface_ip.latency = - XML->sys.L2directory.L2Dir_config[5]/clockRate; interface_ip.is_cache - = true; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power - = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = - 1; interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory.caches.name,"L2 Directory"); - directory.caches.init_cache(&interface_ip); - directory.caches.optimize_array(); - directory.area += directory.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = - (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + - directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.missb.name,"directoryMissB"); - directory.missb.init_cache(&interface_ip); - directory.missb.optimize_array(); - directory.area += directory.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; data - = directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.ifb.name,"directoryFillB"); - directory.ifb.init_cache(&interface_ip); - directory.ifb.optimize_array(); - directory.area += directory.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<XML = XML; + init_stats = true; +} - //prefetch buffer - tag = - XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries - to decide wthether to merge. - data = - directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.prefetchb.name,"directoryPrefetchB"); - directory.prefetchb.init_cache(&interface_ip); - directory.prefetchb.optimize_array(); - directory.area += directory.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; data - = directory.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.wbb.name,"directoryWBB"); - directory.wbb.init_cache(&interface_ip); - directory.wbb.optimize_array(); - directory.area += directory.wbb.local_result.area; +void SharedCache::computeArea() { + if (!init_params) { + std::cerr << "[ SharedCache ] Error: must set params before calling " + "computeArea()\n"; + exit(1); } + set_area = true; +} - if (XML->sys.number_of_dir_levels ==2 && XML->sys.first_level_dir==0) - { - //first level directory - size = - XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128; line = - int(ceil(XML->sys.domain_size/8.0)); assoc = - XML->sys.L2directory.L2Dir_config[2]; banks = - XML->sys.L2directory.L2Dir_config[3]; tag - = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little - bit over estimate interface_ip.specific_tag = 1; interface_ip.tag_w = - tag; interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = - 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = - XML->sys.L2directory.L2Dir_config[4]/clockRate; interface_ip.latency = - XML->sys.L2directory.L2Dir_config[5]/clockRate; interface_ip.is_cache - = true; interface_ip.obj_func_dyn_energy = 0; interface_ip.obj_func_dyn_power - = 0; interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = - 1; interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory1.caches.name,"first level Directory"); - directory1.caches.init_cache(&interface_ip); - directory1.caches.optimize_array(); - directory1.area += directory1.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = - (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + - directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = - int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.missb.name,"directory1MissB"); - directory1.missb.init_cache(&interface_ip); - directory1.missb.optimize_array(); - directory1.area += directory1.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; data - = directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.ifb.name,"directory1FillB"); - directory1.ifb.init_cache(&interface_ip); - directory1.ifb.optimize_array(); - directory1.area += directory1.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries - to decide wthether to merge. - data = - directory1.caches.l_ip.line_sz;//separate queue to prevent from cache - polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.prefetchb.name,"directory1PrefetchB"); - directory1.prefetchb.init_cache(&interface_ip); - directory1.prefetchb.optimize_array(); - directory1.area += directory1.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; data - = directory1.caches.l_ip.line_sz; interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = - XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; interface_ip.assoc - = 0; interface_ip.nbanks = 1; interface_ip.out_w = - interface_ip.line_sz*8; interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.wbb.name,"directoryWBB"); - directory1.wbb.init_cache(&interface_ip); - directory1.wbb.optimize_array(); - directory1.area += directory1.wbb.local_result.area; +void SharedCache::computeStaticPower(bool is_tdp) { + if (!init_params) { + std::cerr << "[ SharedCache ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); } - - if (XML->sys.first_level_dir==1)//IC - { - tag = - XML->sys.physical_address_width + EXTRA_TAG_BITS; data - = int(ceil(XML->sys.domain_size/8.0)); interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = - XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1024; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = - XML->sys.L2[ithCache].L2_config[4]/clockRate; interface_ip.latency = - XML->sys.L2[ithCache].L2_config[5]/clockRate; interface_ip.obj_func_dyn_energy - = 0; interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = - 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(inv_dir.caches.name,"inv_dir"); - inv_dir.caches.init_cache(&interface_ip); - inv_dir.caches.optimize_array(); - inv_dir.area = inv_dir.caches.local_result.area; - + if (!set_area) { + std::cerr << "[ SharedCache ] Error: must ComputeArea before calling " + "computeStaticPower()\n"; + exit(1); } -*/ - // //pipeline - // interface_ip.pipeline_stages = - // int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time)); - // interface_ip.per_stage_vector = directory.caches.l_ip.out_w + - // directory.caches.l_ip.tag_w ; pipeLogicDirectory.init_pipeline(is_default, - // &interface_ip); pipeLogicDirectory.compute_pipeline(); - // - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. - // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + - // pipeLogicDirectory.tot_stage_vector; clockNetwork.optimize_wire(); -} - -void SharedCache::computeEnergy(bool is_tdp) { double homenode_data_access = (cachep.dir_ty == SBT) ? 0.9 : 1.0; if (is_tdp) { if (!((cachep.dir_ty == ST && cacheL == L1Directory) || @@ -927,7 +748,7 @@ void SharedCache::computeEnergy(bool is_tdp) { } } -void SharedCache::displayEnergy(uint32_t indent, bool is_tdp) { +void SharedCache::display(uint32_t indent, bool is_tdp) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); bool long_channel = XML->sys.longer_channel_device; @@ -1215,197 +1036,3 @@ void SharedCache::displayEnergy(uint32_t indent, bool is_tdp) { // //} -void SharedCache::set_cache_param() { - if (cacheL == L2) { - cachep.name = "L2"; - cachep.clockRate = XML->sys.L2[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = - XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); - interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L2[ithCache].device_type; // long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = - XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = - XML->sys.L2[ithCache].device_type; - cachep.capacity = XML->sys.L2[ithCache].L2_config[0]; - cachep.blockW = XML->sys.L2[ithCache].L2_config[1]; - cachep.assoc = XML->sys.L2[ithCache].L2_config[2]; - cachep.nbanks = XML->sys.L2[ithCache].L2_config[3]; - cachep.throughput = XML->sys.L2[ithCache].L2_config[4] / cachep.clockRate; - cachep.latency = XML->sys.L2[ithCache].L2_config[5] / cachep.clockRate; - cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; - cachep.prefetchb_size = XML->sys.L2[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) { - cachep.dir_ty = NonDir; - } else { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - - if (XML->sys.Private_L2 && XML->sys.core[ithCache].vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.core[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.core[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.core[ithCache].vdd; - } - - if (XML->sys.Private_L2 && XML->sys.core[ithCache].power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = - XML->sys.core[ithCache].power_gating_vcc; - } - if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L2[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L2[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L2[ithCache].vdd; - } - if (!XML->sys.Private_L2 && XML->sys.L2[ithCache].power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = - XML->sys.L2[ithCache].power_gating_vcc; - } - } else if (cacheL == L3) { - cachep.name = "L3"; - cachep.clockRate = XML->sys.L3[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = - XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); - interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L3[ithCache].device_type; // long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = - XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = - XML->sys.L3[ithCache].device_type; - cachep.capacity = XML->sys.L3[ithCache].L3_config[0]; - cachep.blockW = XML->sys.L3[ithCache].L3_config[1]; - cachep.assoc = XML->sys.L3[ithCache].L3_config[2]; - cachep.nbanks = XML->sys.L3[ithCache].L3_config[3]; - cachep.throughput = XML->sys.L3[ithCache].L3_config[4] / cachep.clockRate; - cachep.latency = XML->sys.L3[ithCache].L3_config[5] / cachep.clockRate; - cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; - cachep.prefetchb_size = XML->sys.L3[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) { - cachep.dir_ty = NonDir; - } else { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - if (XML->sys.L3[ithCache].vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L3[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L3[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L3[ithCache].vdd; - } - - if (XML->sys.L3[ithCache].power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = - XML->sys.L3[ithCache].power_gating_vcc; - } - } else if (cacheL == L1Directory) { - cachep.name = "First Level Directory"; - cachep.dir_ty = - (enum Dir_type)XML->sys.L1Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = - XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); - interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L1Directory[ithCache].device_type; // long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = - XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = - XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = - XML->sys.L1Directory[ithCache].device_type; - cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; - cachep.throughput = - XML->sys.L1Directory[ithCache].Dir_config[4] / cachep.clockRate; - cachep.latency = - XML->sys.L1Directory[ithCache].Dir_config[5] / cachep.clockRate; - cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size = XML->sys.L1Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; - - if (XML->sys.L1Directory[ithCache].vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L1Directory[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L1Directory[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L1Directory[ithCache].vdd; - } - - if (XML->sys.L1Directory[ithCache].power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = - XML->sys.L1Directory[ithCache].power_gating_vcc; - } - } else if (cacheL == L2Directory) { - cachep.name = "Second Level Directory"; - cachep.dir_ty = - (enum Dir_type)XML->sys.L2Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = - XML->sys.total_cycles / (XML->sys.target_core_clockrate * 1e6); - interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L2Directory[ithCache].device_type; // long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = - XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = - XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = - XML->sys.L2Directory[ithCache].device_type; - cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; - cachep.throughput = - XML->sys.L2Directory[ithCache].Dir_config[4] / cachep.clockRate; - cachep.latency = - XML->sys.L2Directory[ithCache].Dir_config[5] / cachep.clockRate; - cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size = XML->sys.L2Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; - - if (XML->sys.L2Directory[ithCache].vdd > 0) { - interface_ip.specific_hp_vdd = true; - interface_ip.specific_lop_vdd = true; - interface_ip.specific_lstp_vdd = true; - interface_ip.hp_Vdd = XML->sys.L2Directory[ithCache].vdd; - interface_ip.lop_Vdd = XML->sys.L2Directory[ithCache].vdd; - interface_ip.lstp_Vdd = XML->sys.L2Directory[ithCache].vdd; - } - - if (XML->sys.L2Directory[ithCache].power_gating_vcc > -1) { - interface_ip.specific_vcc_min = true; - interface_ip.user_defined_vcc_min = - XML->sys.L2Directory[ithCache].power_gating_vcc; - } - } - // cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35; -} diff --git a/src/cache/sharedcache.h b/src/cache/sharedcache.h index 7aaa4ec..4d03cb3 100644 --- a/src/cache/sharedcache.h +++ b/src/cache/sharedcache.h @@ -35,6 +35,7 @@ #include "XML_Parse.h" #include "area.h" #include "array.h" +#include "cache_param.h" #include "basic_components.h" #include "datacache.h" #include "logic.h" @@ -44,51 +45,47 @@ class SharedCache : public Component { public: - ParseXML *XML; - int ithCache; InputParameter interface_ip; - enum cache_level cacheL; DataCache unicache; // Shared cache CacheDynParam cachep; statsDef homenode_tdp_stats; statsDef homenode_rtp_stats; statsDef homenode_stats_t; - double dir_overhead; - // cache_processor llCache,directory, directory1, inv_dir; - // pipeline pipeLogicCache, pipeLogicDirectory; - // clock_network clockNetwork; - double scktRatio, executionTime; - // Component L2Tot, cc, cc1, ccTot; - - SharedCache(ParseXML *XML_interface, - int ithCache_, - InputParameter *interface_ip_, - enum cache_level cacheL_ = L2); - void set_cache_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, bool is_tdp = true); + SharedCache(); + void set_params(const ParseXML *XML, + const int ithCache_, + InputParameter *interface_ip_, + const enum cache_level cacheL_ = L2); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(bool is_tdp = false); + void computeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); ~SharedCache(){}; -}; - -class CCdir : public Component { -public: - ParseXML *XML; +private: + const ParseXML *XML; int ithCache; - InputParameter interface_ip; - DataCache dc; // Shared cache - ArrayST *shadow_dir; - // cache_processor llCache,directory, directory1, inv_dir; + enum cache_level cacheL; + double dir_overhead; + double scktRatio; + double executionTime; + + bool long_channel; + bool power_gating; + bool init_params; + bool init_stats; + bool set_area; + bool debug; + bool is_default; - // pipeline pipeLogicCache, pipeLogicDirectory; - // clock_network clockNetwork; - double scktRatio, clockRate, executionTime; - Component L2Tot, cc, cc1, ccTot; + double size; + double line; + double assoc; + double banks; - CCdir(ParseXML *XML_interface, int ithCache_, InputParameter *interface_ip_); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, bool is_tdp = true); - ~CCdir(); + enum Device_ty device_t; + enum Core_type core_t; }; #endif /* SHAREDCACHE_H_ */ diff --git a/src/core/core.cc b/src/core/core.cc index 3f48db2..17dcabf 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -61,7 +61,10 @@ Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) set_core_param(); if (XML->sys.Private_L2) { - l2cache = new SharedCache(XML, ithCore, &interface_ip); + l2cache = new SharedCache(); + l2cache->set_params(XML, ithCore, &interface_ip); + l2cache->set_stats(XML); + l2cache->computeArea(); } clockRate = coredynp.clockRate; @@ -224,7 +227,7 @@ void Core::computeEnergy(bool is_tdp) { if (XML->sys.Private_L2) { - l2cache->computeEnergy(is_tdp); + l2cache->computeStaticPower(true); set_pppm(pppm_t, l2cache->cachep.clockRate / clockRate, 1, 1, 1); // l2cache->power = l2cache->power*pppm_t; power = power + l2cache->power * pppm_t; @@ -334,8 +337,7 @@ void Core::computeEnergy(bool is_tdp) { // cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" //<< endl; if (XML->sys.Private_L2) { - - l2cache->computeEnergy(is_tdp); + l2cache->computeStaticPower(); // set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); // l2cache->rt_power = l2cache->rt_power*pppm_t; rt_power = rt_power + l2cache->rt_power; @@ -528,7 +530,7 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // } if (XML->sys.Private_L2) { - l2cache->displayEnergy(4, is_tdp); + l2cache->display(4, true); } } else { diff --git a/src/processor.cc b/src/processor.cc index 0d6d7b0..ab4992e 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -142,9 +142,12 @@ Processor::Processor(ParseXML *XML_interface) if (!XML->sys.Private_L2) { if (numL2 > 0) { for (i = 0; i < numL2; i++) { - l2array.push_back(new SharedCache(XML, i, &interface_ip)); - l2array[i]->computeEnergy(); - l2array[i]->computeEnergy(false); + l2array.push_back(new SharedCache()); + l2array[i]->set_params(XML, i, &interface_ip); + l2array[i]->set_stats(XML); + l2array[i]->computeArea(); + l2array[i]->computeStaticPower(true); + l2array[i]->computeStaticPower(); if (procdynp.homoL2) { l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area() * procdynp.numL2); @@ -190,9 +193,12 @@ Processor::Processor(ParseXML *XML_interface) if (numL3 > 0) { for (i = 0; i < numL3; i++) { - l3array.push_back(new SharedCache(XML, i, &interface_ip, L3)); - l3array[i]->computeEnergy(); - l3array[i]->computeEnergy(false); + l3array.push_back(new SharedCache()); + l3array[i]->set_params(XML, i, &interface_ip, L3); + l3array[i]->set_stats(XML); + l3array[i]->computeArea(); + l3array[i]->computeStaticPower(true); + l3array[i]->computeStaticPower(); if (procdynp.homoL3) { l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area() * procdynp.numL3); @@ -233,9 +239,12 @@ Processor::Processor(ParseXML *XML_interface) } if (numL1Dir > 0) { for (i = 0; i < numL1Dir; i++) { - l1dirarray.push_back(new SharedCache(XML, i, &interface_ip, L1Directory)); - l1dirarray[i]->computeEnergy(); - l1dirarray[i]->computeEnergy(false); + l1dirarray.push_back(new SharedCache()); + l1dirarray[i]->set_params(XML, i, &interface_ip, L1Directory); + l1dirarray[i]->set_stats(XML); + l1dirarray[i]->computeArea(); + l1dirarray[i]->computeStaticPower(true); + l1dirarray[i]->computeStaticPower(); if (procdynp.homoL1Dir) { l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area() * procdynp.numL1Dir); @@ -274,9 +283,12 @@ Processor::Processor(ParseXML *XML_interface) } if (numL2Dir > 0) for (i = 0; i < numL2Dir; i++) { - l2dirarray.push_back(new SharedCache(XML, i, &interface_ip, L2Directory)); - l2dirarray[i]->computeEnergy(); - l2dirarray[i]->computeEnergy(false); + l2dirarray.push_back(new SharedCache()); + l2dirarray[i]->set_params(XML, i, &interface_ip, L2Directory); + l2dirarray[i]->set_stats(XML); + l2dirarray[i]->computeArea(); + l2dirarray[i]->computeStaticPower(true); + l2dirarray[i]->computeStaticPower(); if (procdynp.homoL2Dir) { l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area() * procdynp.numL2Dir); @@ -901,26 +913,26 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (!XML->sys.Private_L2) { for (i = 0; i < numL2; i++) { - l2array[i]->displayEnergy(indent + 4, is_tdp); + l2array[i]->display(indent + 4, is_tdp); cout << "************************************************************" "*****************************" << endl; } } for (i = 0; i < numL3; i++) { - l3array[i]->displayEnergy(indent + 4, is_tdp); + l3array[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL1Dir; i++) { - l1dirarray[i]->displayEnergy(indent + 4, is_tdp); + l1dirarray[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL2Dir; i++) { - l2dirarray[i]->displayEnergy(indent + 4, is_tdp); + l2dirarray[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; From 1eefdef89d004c47aad4d21703cfeb3b9823f6d5 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 16 Jun 2020 20:16:21 -0500 Subject: [PATCH 20/59] Added branchPredictor --- src/core/branch_predictor.cc | 433 ++++++++++++++++++----------------- src/core/branch_predictor.h | 41 ++-- src/core/instfetch.cc | 6 +- 3 files changed, 253 insertions(+), 227 deletions(-) diff --git a/src/core/branch_predictor.cc b/src/core/branch_predictor.cc index c17fd42..a0c6d4e 100644 --- a/src/core/branch_predictor.cc +++ b/src/core/branch_predictor.cc @@ -29,6 +29,7 @@ * ***************************************************************************/ + #include "branch_predictor.h" #include "XML_Parse.h" @@ -43,24 +44,24 @@ #include #include -BranchPredictor::BranchPredictor(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), globalBPT(0), localBPT(0), L1_localBPT(0), - L2_localBPT(0), chooser(0), RAS(0), exist(exist_) { - /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in - * Alpha 21264, including global predictor, local two level predictor, and - * Chooser. The Branch predictor also includes a RAS (return address stack) - * for function calls Branch predictors are tagged by thread ID and modeled as - * 1-way associative cache. However RAS return address stacks are duplicated - * for each thread. - * TODO:Data Width need to be computed more precisely * - */ + + +BranchPredictor::BranchPredictor(){ + init_params = false; + init_stats = false; + long_channel = false; + power_gating = false; +} + +void BranchPredictor::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_){ + + XML = XML_interface; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + ithCore = ithCore_; + + exist = exist_; + if (!exist) return; int tag, data; @@ -100,18 +101,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, - "Global Predictor", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area() + - globalBPT->local_result.area); - area.set_area(area.get_area() + globalBPT->local_result.area); - - // Local BPT (Level 1) + globalBPT.set_params(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); + + //local BPT 1 data = - int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0] / 8.0)); + int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0] / 8.0)); interface_ip.line_sz = data; interface_ip.cache_sz = data * XML->sys.core[ithCore].predictor.local_predictor_entries; @@ -128,14 +122,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, + L1_localBPT.set_params(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area() + - L1_localBPT->local_result.area); - area.set_area(area.get_area() + L1_localBPT->local_result.area); // Local BPT (Level 2) data = @@ -156,15 +147,12 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, + L2_localBPT.set_params(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area() + - L2_localBPT->local_result.area); - area.set_area(area.get_area() + L2_localBPT->local_result.area); - + // Chooser data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits / 8.0)); @@ -184,14 +172,11 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, + chooser.set_params(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area() + chooser->local_result.area); - area.set_area(area.get_area() + chooser->local_result.area); - // RAS return address stacks are Duplicated for each thread. interface_ip.is_cache = false; interface_ip.pure_ram = true; @@ -212,41 +197,89 @@ BranchPredictor::BranchPredictor(ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST( + RAS.set_params( &interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area() + - RAS->local_result.area * coredynp.num_hthreads); - area.set_area(area.get_area() + - RAS->local_result.area * coredynp.num_hthreads); + + init_params = true; + } -void BranchPredictor::computeEnergy(bool is_tdp) { + + + +void BranchPredictor::computeArea(){ + if (!init_params) { + std::cerr << "[ BranchPredictor ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + + globalBPT.computeArea(); + globalBPT.area.set_area(globalBPT.area.get_area() + + globalBPT.local_result.area); + area.set_area(area.get_area() + globalBPT.local_result.area); + + L1_localBPT.computeArea(); + L1_localBPT.area.set_area(L1_localBPT.area.get_area() + + L1_localBPT.local_result.area); + area.set_area(area.get_area() + L1_localBPT.local_result.area); + + L2_localBPT.computeArea(); + L2_localBPT.area.set_area(L2_localBPT.area.get_area() + + L2_localBPT.local_result.area); + area.set_area(area.get_area() + L2_localBPT.local_result.area); + + chooser.computeArea(); + chooser.area.set_area(chooser.area.get_area() + + chooser.local_result.area); + area.set_area(area.get_area() + chooser.local_result.area); + + RAS.computeArea(); + RAS.area.set_area(RAS.area.get_area() + + RAS.local_result.area*coredynp.num_hthreads); + area.set_area(area.get_area() + RAS.local_result.area *coredynp.num_hthreads); + + +} + + +void BranchPredictor::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. +} + +void BranchPredictor::set_stats(const ParseXML *XML, const MCParam &mcp_){ + init_stats = true; +} + +void BranchPredictor::computeDynamicPower(bool is_tdp){ if (!exist) return; double r_access; - double w_access; + double w_access; if (is_tdp) { r_access = coredynp.predictionW * coredynp.BR_duty_cycle; w_access = 0 * coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; + globalBPT.stats_t.readAc.access = r_access; + globalBPT.stats_t.writeAc.access = w_access; + globalBPT.tdp_stats = globalBPT.stats_t; - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; + L1_localBPT.stats_t.readAc.access = r_access; + L1_localBPT.stats_t.writeAc.access = w_access; + L1_localBPT.tdp_stats = L1_localBPT.stats_t; - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; + L2_localBPT.stats_t.readAc.access = r_access; + L2_localBPT.stats_t.writeAc.access = w_access; + L2_localBPT.tdp_stats = L2_localBPT.stats_t; - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; + chooser.stats_t.readAc.access = r_access; + chooser.stats_t.writeAc.access = w_access; + chooser.tdp_stats = chooser.stats_t; - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; + RAS.stats_t.readAc.access = r_access; + RAS.stats_t.writeAc.access = w_access; + RAS.tdp_stats = RAS.stats_t; } else { // The resolution of BPT accesses is coarse, but this is // because most simulators cannot track finer grained details @@ -255,88 +288,89 @@ void BranchPredictor::computeEnergy(bool is_tdp) { XML->sys.core[ithCore].branch_mispredictions + 0.1 * XML->sys.core[ithCore] .branch_instructions; // 10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; + globalBPT.stats_t.readAc.access = r_access; + globalBPT.stats_t.writeAc.access = w_access; + globalBPT.rtp_stats = globalBPT.stats_t; - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; + L1_localBPT.stats_t.readAc.access = r_access; + L1_localBPT.stats_t.writeAc.access = w_access; + L1_localBPT.rtp_stats = L1_localBPT.stats_t; - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; + L2_localBPT.stats_t.readAc.access = r_access; + L2_localBPT.stats_t.writeAc.access = w_access; + L2_localBPT.rtp_stats = L2_localBPT.stats_t; - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; + chooser.stats_t.readAc.access = r_access; + chooser.stats_t.writeAc.access = w_access; + chooser.rtp_stats = chooser.stats_t; - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; + RAS.stats_t.readAc.access = XML->sys.core[ithCore].function_calls; + RAS.stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; + RAS.rtp_stats = RAS.stats_t; } - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += - globalBPT->local_result.power.readOp.dynamic * - globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access * - globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += - L1_localBPT->local_result.power.readOp.dynamic * - L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access * - L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += - L2_localBPT->local_result.power.readOp.dynamic * - L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access * - L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += - chooser->local_result.power.readOp.dynamic * - chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access * - chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += - RAS->local_result.power.readOp.dynamic * RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access * RAS->local_result.power.writeOp.dynamic; + globalBPT.power_t.reset(); + L1_localBPT.power_t.reset(); + L2_localBPT.power_t.reset(); + chooser.power_t.reset(); + RAS.power_t.reset(); + + globalBPT.power_t.readOp.dynamic += + globalBPT.local_result.power.readOp.dynamic * + globalBPT.stats_t.readAc.access + + globalBPT.stats_t.writeAc.access * + globalBPT.local_result.power.writeOp.dynamic; + L1_localBPT.power_t.readOp.dynamic += + L1_localBPT.local_result.power.readOp.dynamic * + L1_localBPT.stats_t.readAc.access + + L1_localBPT.stats_t.writeAc.access * + L1_localBPT.local_result.power.writeOp.dynamic; + + L2_localBPT.power_t.readOp.dynamic += + L2_localBPT.local_result.power.readOp.dynamic * + L2_localBPT.stats_t.readAc.access + + L2_localBPT.stats_t.writeAc.access * + L2_localBPT.local_result.power.writeOp.dynamic; + + chooser.power_t.readOp.dynamic += + chooser.local_result.power.readOp.dynamic * + chooser.stats_t.readAc.access + + chooser.stats_t.writeAc.access * + chooser.local_result.power.writeOp.dynamic; + RAS.power_t.readOp.dynamic += + RAS.local_result.power.readOp.dynamic * RAS.stats_t.readAc.access + + RAS.stats_t.writeAc.access * RAS.local_result.power.writeOp.dynamic; if (is_tdp) { - globalBPT->power = - globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; - L1_localBPT->power = - L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; - L2_localBPT->power = - L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power * pppm_lkg; - RAS->power = - RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; - - power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + - chooser->power + RAS->power; + globalBPT.power = + globalBPT.power_t + globalBPT.local_result.power * pppm_lkg; + L1_localBPT.power = + L1_localBPT.power_t + L1_localBPT.local_result.power * pppm_lkg; + L2_localBPT.power = + L2_localBPT.power_t + L2_localBPT.local_result.power * pppm_lkg; + chooser.power = chooser.power_t + chooser.local_result.power * pppm_lkg; + RAS.power = + RAS.power_t + RAS.local_result.power * coredynp.pppm_lkg_multhread; + + power = power + globalBPT.power + L1_localBPT.power + L2_localBPT.power + + chooser.power + RAS.power; } else { - globalBPT->rt_power = - globalBPT->power_t + globalBPT->local_result.power * pppm_lkg; - L1_localBPT->rt_power = - L1_localBPT->power_t + L1_localBPT->local_result.power * pppm_lkg; - L2_localBPT->rt_power = - L2_localBPT->power_t + L2_localBPT->local_result.power * pppm_lkg; - chooser->rt_power = - chooser->power_t + chooser->local_result.power * pppm_lkg; - RAS->rt_power = - RAS->power_t + RAS->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + - L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + globalBPT.rt_power = + globalBPT.power_t + globalBPT.local_result.power * pppm_lkg; + L1_localBPT.rt_power = + L1_localBPT.power_t + L1_localBPT.local_result.power * pppm_lkg; + L2_localBPT.rt_power = + L2_localBPT.power_t + L2_localBPT.local_result.power * pppm_lkg; + chooser.rt_power = + chooser.power_t + chooser.local_result.power * pppm_lkg; + RAS.rt_power = + RAS.power_t + RAS.local_result.power * coredynp.pppm_lkg_multhread; + rt_power = rt_power + globalBPT.rt_power + L1_localBPT.rt_power + + L2_localBPT.rt_power + chooser.rt_power + RAS.rt_power; } } + void BranchPredictor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (!exist) return; @@ -346,175 +380,150 @@ void BranchPredictor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { bool power_gating = XML->sys.power_gating; if (is_tdp) { cout << indent_str << "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << globalBPT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << globalBPT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << globalBPT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? globalBPT->power.readOp.longer_channel_leakage - : globalBPT->power.readOp.leakage) + << (long_channel ? globalBPT.power.readOp.longer_channel_leakage + : globalBPT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? globalBPT->power.readOp + << (long_channel ? globalBPT.power.readOp .power_gated_with_long_channel_leakage - : globalBPT->power.readOp.power_gated_leakage) + : globalBPT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << globalBPT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << globalBPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << globalBPT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Local Predictor:" << endl; cout << indent_str << "L1_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << L1_localBPT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << L1_localBPT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? L1_localBPT->power.readOp.longer_channel_leakage - : L1_localBPT->power.readOp.leakage) + << (long_channel ? L1_localBPT.power.readOp.longer_channel_leakage + : L1_localBPT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? L1_localBPT->power.readOp + << (long_channel ? L1_localBPT.power.readOp .power_gated_with_long_channel_leakage - : L1_localBPT->power.readOp.power_gated_leakage) + : L1_localBPT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << L1_localBPT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << L1_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << L1_localBPT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "L2_Local Predictor:" << endl; - cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << L2_localBPT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << L2_localBPT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? L2_localBPT->power.readOp.longer_channel_leakage - : L2_localBPT->power.readOp.leakage) + << (long_channel ? L2_localBPT.power.readOp.longer_channel_leakage + : L2_localBPT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? L2_localBPT->power.readOp + << (long_channel ? L2_localBPT.power.readOp .power_gated_with_long_channel_leakage - : L2_localBPT->power.readOp.power_gated_leakage) + : L2_localBPT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << L2_localBPT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << L2_localBPT->rt_power.readOp.dynamic / executionTime << " W" + << L2_localBPT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Chooser:" << endl; - cout << indent_str_next << "Area = " << chooser->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << chooser.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << chooser->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << chooser.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? chooser->power.readOp.longer_channel_leakage - : chooser->power.readOp.leakage) + << (long_channel ? chooser.power.readOp.longer_channel_leakage + : chooser.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? chooser->power.readOp.power_gated_with_long_channel_leakage - : chooser->power.readOp.power_gated_leakage) + ? chooser.power.readOp.power_gated_with_long_channel_leakage + : chooser.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << chooser.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << chooser->rt_power.readOp.dynamic / executionTime << " W" << endl; + << chooser.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "RAS:" << endl; - cout << indent_str_next << "Area = " << RAS->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << RAS.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << RAS->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << RAS.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? RAS->power.readOp.longer_channel_leakage - : RAS->power.readOp.leakage) + << (long_channel ? RAS.power.readOp.longer_channel_leakage + : RAS.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? RAS->power.readOp.power_gated_with_long_channel_leakage - : RAS->power.readOp.power_gated_leakage) + ? RAS.power.readOp.power_gated_with_long_channel_leakage + : RAS.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl; + << "Gate Leakage = " << RAS.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next - << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic / executionTime + << "Runtime Dynamic = " << RAS.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } else { // cout << indent_str_next << "Global Predictor Peak Dynamic = " - //<< globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; + //<< globalBPT.rt_power.readOp.dynamic*clockRate << " W" << endl; // cout << indent_str_next << "Global Predictor Subthreshold Leakage = " - // << globalBPT->rt_power.readOp.leakage <<" W" << endl; cout << + // << globalBPT.rt_power.readOp.leakage <<" W" << endl; cout << // indent_str_next //<< "Global Predictor Gate Leakage = " << - // globalBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // globalBPT.rt_power.readOp.gate_leakage << " W" << endl; cout // << indent_str_next << "Local Predictor Peak Dynamic = " << - // L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // L1_localBPT.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Local Predictor Subthreshold Leakage = " << - // L1_localBPT->rt_power.readOp.leakage << " W" << endl; cout << + // L1_localBPT.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next << "Local Predictor Gate Leakage = " << - // L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl; cout + // L1_localBPT.rt_power.readOp.gate_leakage << " W" << endl; cout // << indent_str_next << "Chooser Peak Dynamic = " << - // chooser->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // chooser.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Chooser Subthreshold Leakage = " << - // chooser->rt_power.readOp.leakage << " W" << endl; cout << + // chooser.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next - //<< "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << + //<< "Chooser Gate Leakage = " << chooser.rt_power.readOp.gate_leakage << //" W" << endl; cout << indent_str_next << "RAS Peak Dynamic = " - //<< RAS->rt_power.readOp.dynamic*clockRate << " W" << endl; + //<< RAS.rt_power.readOp.dynamic*clockRate << " W" << endl; // cout << indent_str_next << "RAS Subthreshold Leakage = " << - // RAS->rt_power.readOp.leakage << " W" << endl; cout << + // RAS.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next - // << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" + // << "RAS Gate Leakage = " << RAS.rt_power.readOp.gate_leakage << " W" //<< endl; } } BranchPredictor ::~BranchPredictor() { - if (!exist) return; - if (globalBPT) { - delete globalBPT; - globalBPT = 0; - } - if (localBPT) { - delete localBPT; - localBPT = 0; - } - if (L1_localBPT) { - delete L1_localBPT; - L1_localBPT = 0; - } - if (L2_localBPT) { - delete L2_localBPT; - L2_localBPT = 0; - } - if (chooser) { - delete chooser; - chooser = 0; - } - if (RAS) { - delete RAS; - RAS = 0; - } -} +} \ No newline at end of file diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h index 8fb5d57..e7b8453 100644 --- a/src/core/branch_predictor.h +++ b/src/core/branch_predictor.h @@ -41,8 +41,8 @@ class BranchPredictor : public Component { public: - ParseXML *XML; int ithCore; + ParseXML *XML; InputParameter interface_ip; CoreDynParam coredynp; double clockRate; @@ -50,22 +50,37 @@ class BranchPredictor : public Component { double scktRatio; double chip_PR_overhead; double macro_PR_overhead; - ArrayST *globalBPT; - ArrayST *localBPT; - ArrayST *L1_localBPT; - ArrayST *L2_localBPT; - ArrayST *chooser; - ArrayST *RAS; + ArrayST globalBPT; + ArrayST localBPT; + ArrayST L1_localBPT; + ArrayST L2_localBPT; + ArrayST chooser; + ArrayST RAS; bool exist; - BranchPredictor(ParseXML *XML_interface, + BranchPredictor(); + void set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exsit = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~BranchPredictor(); + bool exist_ = true); + void set_stats(const ParseXML *XML, const MCParam &mcp_); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(bool is_tdp); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~BranchPredictor(); + +private: + + bool init_params; + bool init_stats; + + bool long_channel; + bool power_gating; + + + }; -#endif // __BRANCH_PREDICTOR__ +#endif // __BRANCH_PREDICTOR__ \ No newline at end of file diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index c02c65d..8dafd96 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -361,7 +361,9 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, area.set_area(area.get_area() + BTB->local_result.area); /// cout<<"area="<set_params(XML, ithCore, &interface_ip, coredynp); + BPT->computeArea(); area.set_area(area.get_area() + BPT->area.get_area()); } @@ -538,7 +540,7 @@ void InstFetchU::computeEnergy(bool is_tdp) { BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; - BPT->computeEnergy(is_tdp); + BPT->computeDynamicPower(is_tdp); } if (is_tdp) { From 19ff96ad499fe3b1effb1b900dc1c429e8582dce Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Wed, 17 Jun 2020 00:25:10 -0500 Subject: [PATCH 21/59] mmu done --- src/core/branch_predictor.cc | 9 +- src/core/branch_predictor.h | 5 +- src/core/core.cc | 9 +- src/core/instfetch.cc | 1 + src/core/mmu.cc | 195 ++++++++++++++++++++--------------- src/core/mmu.h | 26 +++-- 6 files changed, 144 insertions(+), 101 deletions(-) diff --git a/src/core/branch_predictor.cc b/src/core/branch_predictor.cc index a0c6d4e..9fa2093 100644 --- a/src/core/branch_predictor.cc +++ b/src/core/branch_predictor.cc @@ -49,8 +49,6 @@ BranchPredictor::BranchPredictor(){ init_params = false; init_stats = false; - long_channel = false; - power_gating = false; } void BranchPredictor::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_){ @@ -249,13 +247,18 @@ void BranchPredictor::computeStaticPower() { // along with the array area. } -void BranchPredictor::set_stats(const ParseXML *XML, const MCParam &mcp_){ +void BranchPredictor::set_stats(const ParseXML *XML){ init_stats = true; } void BranchPredictor::computeDynamicPower(bool is_tdp){ if (!exist) return; + if (!init_stats) { + std::cerr << "[ BranchPredictor ] Error: must set params before calling " + "computeDynamicPower()\n"; + exit(1); + } double r_access; double w_access; if (is_tdp) { diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h index e7b8453..0e7ae01 100644 --- a/src/core/branch_predictor.h +++ b/src/core/branch_predictor.h @@ -64,7 +64,7 @@ class BranchPredictor : public Component { InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_ = true); - void set_stats(const ParseXML *XML, const MCParam &mcp_); + void set_stats(const ParseXML *XML); void computeArea(); void computeStaticPower(); void computeDynamicPower(bool is_tdp); @@ -75,9 +75,6 @@ class BranchPredictor : public Component { bool init_params; bool init_stats; - - bool long_channel; - bool power_gating; diff --git a/src/core/core.cc b/src/core/core.cc index 17dcabf..e542bb4 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -71,7 +71,10 @@ Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) executionTime = coredynp.executionTime; ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); - mmu = new MemManU(XML, ithCore, &interface_ip, coredynp, exit_flag); + mmu = new MemManU(); + mmu->set_params(XML, ithCore, &interface_ip, coredynp); + mmu->computeArea(); + mmu->set_stats(XML); exu = new EXECU( XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); @@ -146,7 +149,7 @@ void Core::computeEnergy(bool is_tdp) { if (is_tdp) { ifu->computeEnergy(is_tdp); lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); + mmu->computeDynamicPower(is_tdp); exu->computeEnergy(is_tdp); if (coredynp.core_ty == OOO) { @@ -236,7 +239,7 @@ void Core::computeEnergy(bool is_tdp) { } else { ifu->computeEnergy(is_tdp); lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); + mmu->computeDynamicPower(is_tdp); exu->computeEnergy(is_tdp); if (coredynp.core_ty == OOO) { diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 8dafd96..d6d092f 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -364,6 +364,7 @@ InstFetchU::InstFetchU(ParseXML *XML_interface, BPT = new BranchPredictor(); BPT->set_params(XML, ithCore, &interface_ip, coredynp); BPT->computeArea(); + BPT->set_stats(XML); area.set_area(area.get_area() + BPT->area.get_area()); } diff --git a/src/core/mmu.cc b/src/core/mmu.cc index 776b6fb..975d314 100644 --- a/src/core/mmu.cc +++ b/src/core/mmu.cc @@ -43,13 +43,20 @@ #include #include -MemManU::MemManU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), itlb(0), dtlb(0), exist(exist_) { +MemManU::MemManU(){ + init_params = false; + init_stats = false; +} + +void MemManU::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_){ + + XML = XML_interface; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + ithCore = ithCore_; + + exist = exist_; + if (!exist) return; int tag, data; @@ -95,10 +102,8 @@ MemManU::MemManU(ParseXML *XML_interface, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST( + itlb.set_params( &interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area() + itlb->local_result.area); - area.set_area(area.get_area() + itlb->local_result.area); // output_data_csv(itlb.tlb.local_result); // dtlb @@ -134,69 +139,103 @@ MemManU::MemManU(ParseXML *XML_interface, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST( + dtlb.set_params( &interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area() + dtlb->local_result.area); - area.set_area(area.get_area() + dtlb->local_result.area); - // output_data_csv(dtlb.tlb.local_result); + + init_params = true; +} + +void MemManU::computeArea(){ + if (!init_params) { + std::cerr << "[ MemManU ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + + dtlb.computeArea(); + dtlb.area.set_area(dtlb.area.get_area() + + dtlb.local_result.area); + area.set_area(area.get_area() + dtlb.local_result.area); + + itlb.computeArea(); + itlb.area.set_area(itlb.area.get_area() + + itlb.local_result.area); + area.set_area(area.get_area() + itlb.local_result.area); + +} + +void MemManU::set_stats(const ParseXML *XML){ + init_stats = true; +} + +void MemManU::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. } -void MemManU::computeEnergy(bool is_tdp) { +void MemManU::computeDynamicPower(bool is_tdp){ if (!exist) return; + if (!init_stats) { + std::cerr << "[ MCFrontEnd ] Error: must set params before calling " + "computeDynamicPower()\n"; + exit(1); + } if (is_tdp) { // init stats for Peak - itlb->stats_t.readAc.access = - itlb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = - itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; + itlb.stats_t.readAc.access = + itlb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + itlb.stats_t.readAc.miss = 0; + itlb.stats_t.readAc.hit = + itlb.stats_t.readAc.access - itlb.stats_t.readAc.miss; + itlb.tdp_stats = itlb.stats_t; - dtlb->stats_t.readAc.access = - dtlb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = - dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; + dtlb.stats_t.readAc.access = + dtlb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dtlb.stats_t.readAc.miss = 0; + dtlb.stats_t.readAc.hit = + dtlb.stats_t.readAc.access - dtlb.stats_t.readAc.miss; + dtlb.tdp_stats = dtlb.stats_t; } else { // init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = - itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; + itlb.stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; + itlb.stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; + itlb.stats_t.readAc.hit = + itlb.stats_t.readAc.access - itlb.stats_t.readAc.miss; + itlb.rtp_stats = itlb.stats_t; - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = - dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; + dtlb.stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; + dtlb.stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; + dtlb.stats_t.readAc.hit = + dtlb.stats_t.readAc.access - dtlb.stats_t.readAc.miss; + dtlb.rtp_stats = dtlb.stats_t; } - itlb->power_t.reset(); - dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += - itlb->stats_t.readAc.access * itlb->local_result.power.searchOp + itlb.power_t.reset(); + dtlb.power_t.reset(); + itlb.power_t.readOp.dynamic += + itlb.stats_t.readAc.access * itlb.local_result.power.searchOp .dynamic // FA spent most power in tag, // so use total access not hits - + itlb->stats_t.readAc.miss * itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += - dtlb->stats_t.readAc.access * dtlb->local_result.power.searchOp + + itlb.stats_t.readAc.miss * itlb.local_result.power.writeOp.dynamic; + dtlb.power_t.readOp.dynamic += + dtlb.stats_t.readAc.access * dtlb.local_result.power.searchOp .dynamic // FA spent most power in tag, // so use total access not hits - + dtlb->stats_t.readAc.miss * dtlb->local_result.power.writeOp.dynamic; + + dtlb.stats_t.readAc.miss * dtlb.local_result.power.writeOp.dynamic; if (is_tdp) { - itlb->power = itlb->power_t + itlb->local_result.power * pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; - power = power + itlb->power + dtlb->power; + itlb.power = itlb.power_t + itlb.local_result.power * pppm_lkg; + dtlb.power = dtlb.power_t + dtlb.local_result.power * pppm_lkg; + power = power + itlb.power + dtlb.power; } else { - itlb->rt_power = itlb->power_t + itlb->local_result.power * pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power * pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; + itlb.rt_power = itlb.power_t + itlb.local_result.power * pppm_lkg; + dtlb.rt_power = dtlb.power_t + dtlb.local_result.power * pppm_lkg; + rt_power = rt_power + itlb.rt_power + dtlb.rt_power; } + } void MemManU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { @@ -209,65 +248,65 @@ void MemManU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (is_tdp) { cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << itlb.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << itlb->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << itlb.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? itlb->power.readOp.longer_channel_leakage - : itlb->power.readOp.leakage) + << (long_channel ? itlb.power.readOp.longer_channel_leakage + : itlb.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? itlb->power.readOp.power_gated_with_long_channel_leakage - : itlb->power.readOp.power_gated_leakage) + ? itlb.power.readOp.power_gated_with_long_channel_leakage + : itlb.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << itlb.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << itlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + << itlb.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Dtlb:" << endl; - cout << indent_str_next << "Area = " << dtlb->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << dtlb.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << dtlb->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << dtlb.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? dtlb->power.readOp.longer_channel_leakage - : dtlb->power.readOp.leakage) + << (long_channel ? dtlb.power.readOp.longer_channel_leakage + : dtlb.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? dtlb->power.readOp.power_gated_with_long_channel_leakage - : dtlb->power.readOp.power_gated_leakage) + ? dtlb.power.readOp.power_gated_with_long_channel_leakage + : dtlb.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << dtlb.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << dtlb->rt_power.readOp.dynamic / executionTime << " W" << endl; + << dtlb.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } else { cout << indent_str_next << "Itlb Peak Dynamic = " - << itlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + << itlb.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next - << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage + << "Itlb Subthreshold Leakage = " << itlb.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage + << "Itlb Gate Leakage = " << itlb.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Dtlb Peak Dynamic = " - << dtlb->rt_power.readOp.dynamic * clockRate << " W" << endl; + << dtlb.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next - << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage + << "Dtlb Subthreshold Leakage = " << dtlb.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage + << "Dtlb Gate Leakage = " << dtlb.rt_power.readOp.gate_leakage << " W" << endl; } } @@ -276,12 +315,4 @@ MemManU ::~MemManU() { if (!exist) return; - if (itlb) { - delete itlb; - itlb = 0; - } - if (dtlb) { - delete dtlb; - dtlb = 0; - } -} +} \ No newline at end of file diff --git a/src/core/mmu.h b/src/core/mmu.h index 19e6312..3b8a7a7 100644 --- a/src/core/mmu.h +++ b/src/core/mmu.h @@ -50,18 +50,26 @@ class MemManU : public Component { double scktRatio; double chip_PR_overhead; double macro_PR_overhead; - ArrayST *itlb; - ArrayST *dtlb; + ArrayST itlb; + ArrayST dtlb; bool exist; - MemManU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); + MemManU(); + void set_params(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(bool is_tdp); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~MemManU(); + + private: + bool init_params; + bool init_stats; }; -#endif // __MEMORY_MANAGEMENT_U_H__ +#endif // __MEMORY_MANAGEMENT_U_H__ \ No newline at end of file From 772f0fdd1827742eb9af17333d33e011d6bf6380 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Thu, 18 Jun 2020 13:52:15 -0500 Subject: [PATCH 22/59] Error in Ececute unit --- src/core/exec_unit.cc | 13 +- src/core/regfile.cc | 310 ++++++++++++++++++++++-------------------- src/core/regfile.h | 32 +++-- 3 files changed, 196 insertions(+), 159 deletions(-) diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 3d9a219..794d5fc 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -59,7 +59,12 @@ EXECU::EXECU(ParseXML *XML_interface, double fu_height = 0.0; clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip, coredynp); + rfu = new RegFU(); + rfu->set_params(XML, ithCore, &interface_ip, coredynp); + rfu->computeArea(); + rfu->set_stats(XML); + rfu->computeStaticPower(); + scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + @@ -413,14 +418,14 @@ void EXECU::computeEnergy(bool is_tdp) { if (!exist) return; double pppm_t[4] = {1, 1, 1, 1}; - // rfu->power.reset(); - // rfu->rt_power.reset(); + //rfu->power.reset(); + //rfu->rt_power.reset(); // scheu->power.reset(); // scheu->rt_power.reset(); // exeu->power.reset(); // exeu->rt_power.reset(); - rfu->computeEnergy(is_tdp); + rfu->computeDynamicPower(is_tdp); scheu->computeEnergy(is_tdp); exeu->computeEnergy(is_tdp); if (coredynp.num_fpus > 0) { diff --git a/src/core/regfile.cc b/src/core/regfile.cc index bd77879..5742dd8 100644 --- a/src/core/regfile.cc +++ b/src/core/regfile.cc @@ -43,22 +43,33 @@ #include #include -RegFU::RegFU(ParseXML *XML_interface, +RegFU::RegFU(){ + init_params = false; + init_stats = false; + +} + +void RegFU::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), IRF(0), FRF(0), RFWIN(0), exist(exist_) { + bool exist_){ /* * processors have separate architectural register files for each thread. * therefore, the bypass buses need to travel across all the register files. */ + XML = XML_interface; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + ithCore = ithCore_; + + exist = exist_; if (!exist) return; int data; + clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; //**********************************IRF*************************************** @@ -84,22 +95,11 @@ RegFU::RegFU(ParseXML *XML_interface, interface_ip.num_rd_ports = 2 * coredynp.peak_issueW; interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, + IRF.set_params(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area() + - IRF->local_result.area * coredynp.num_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - area.set_area(area.get_area() + - IRF->local_result.area * coredynp.num_pipelines * cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); // area.set_area(area.get_area()*cdb_overhead); // output_data_csv(IRF.RF.local_result); @@ -126,31 +126,19 @@ RegFU::RegFU(ParseXML *XML_interface, interface_ip.num_rd_ports = 2 * XML->sys.core[ithCore].issue_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, + FRF.set_params(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area() + - FRF->local_result.area * coredynp.num_fp_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); - area.set_area(area.get_area() + - FRF->local_result.area * coredynp.num_fp_pipelines * - cdb_overhead * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1)); // area.set_area(area.get_area()*cdb_overhead); // output_data_csv(FRF.RF.local_result); - int_regfile_height = IRF->local_result.cache_ht * + int_regfile_height = IRF.local_result.cache_ht * ((coredynp.scheu_ty == ReservationStation) ? XML->sys.core[ithCore].number_hardware_threads : 1) * sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht * + fp_regfile_height = FRF.local_result.cache_ht * ((coredynp.scheu_ty == ReservationStation) ? XML->sys.core[ithCore].number_hardware_threads : 1) * @@ -168,7 +156,7 @@ RegFU::RegFU(ParseXML *XML_interface, interface_ip.pure_ram = true; interface_ip.line_sz = int(ceil(data / 8.0)); interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size * - IRF->l_ip.cache_sz * + IRF.l_ip.cache_sz * XML->sys.core[ithCore].number_hardware_threads; interface_ip.assoc = 1; interface_ip.nbanks = 1; @@ -186,20 +174,69 @@ RegFU::RegFU(ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, + RFWIN.set_params(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area() + - RFWIN->local_result.area * coredynp.num_pipelines); - area.set_area(area.get_area() + - RFWIN->local_result.area * coredynp.num_pipelines); // output_data_csv(RFWIN.RF.local_result); } + + init_params = true; +} + +void RegFU::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. +} + +void RegFU::set_stats(const ParseXML *XML){ + init_stats = true; } -void RegFU::computeEnergy(bool is_tdp) { +void RegFU::computeArea(){ + + if (!init_params) { + std::cerr << "[ RegFU ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + if (coredynp.regWindowing) { + RFWIN.computeArea(); + RFWIN.area.set_area(RFWIN.area.get_area() + + RFWIN.local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + RFWIN.local_result.area * coredynp.num_pipelines); + } + FRF.computeArea(); + FRF.area.set_area(FRF.area.get_area() + + FRF.local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + FRF.local_result.area * coredynp.num_fp_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + IRF.computeArea(); + IRF.area.set_area(IRF.area.get_area() + + IRF.local_result.area * coredynp.num_pipelines * + cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + area.set_area(area.get_area() + + IRF.local_result.area * coredynp.num_pipelines * cdb_overhead * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1)); + +} +void RegFU::computeDynamicPower(bool is_tdp) { /* * Architecture RF and physical RF cannot be present at the same time. * Therefore, the RF stats can only refer to either ARF or PRF; @@ -209,111 +246,111 @@ void RegFU::computeEnergy(bool is_tdp) { return; if (is_tdp) { // init stats for Peak - IRF->stats_t.readAc.access = + IRF.stats_t.readAc.access = coredynp.issueW * 2 * (coredynp.ALU_duty_cycle * 1.1 + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * coredynp.num_pipelines; - IRF->stats_t.writeAc.access = + IRF.stats_t.writeAc.access = coredynp.issueW * (coredynp.ALU_duty_cycle * 1.1 + (coredynp.num_muls > 0 ? coredynp.MUL_duty_cycle : 0)) * coredynp.num_pipelines; // Rule of Thumb: about 10% RF related instructions do not need to access // ALUs - IRF->tdp_stats = IRF->stats_t; + IRF.tdp_stats = IRF.stats_t; - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports * + FRF.stats_t.readAc.access = FRF.l_ip.num_rd_ports * coredynp.FPU_duty_cycle * 1.05 * coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports * + FRF.stats_t.writeAc.access = FRF.l_ip.num_wr_ports * coredynp.FPU_duty_cycle * 1.05 * coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; + FRF.tdp_stats = FRF.stats_t; if (coredynp.regWindowing) { - RFWIN->stats_t.readAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0; // 0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; + RFWIN.stats_t.readAc.access = 0; // 0.5*RFWIN.l_ip.num_rw_ports; + RFWIN.stats_t.writeAc.access = 0; // 0.5*RFWIN.l_ip.num_rw_ports; + RFWIN.tdp_stats = RFWIN.stats_t; } } else { // init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = + IRF.stats_t.readAc.access = XML->sys.core[ithCore] .int_regfile_reads; // TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; + IRF.stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; + IRF.rtp_stats = IRF.stats_t; - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; + FRF.stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; + FRF.stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; + FRF.rtp_stats = FRF.stats_t; if (coredynp.regWindowing) { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls * 16; - RFWIN->stats_t.writeAc.access = + RFWIN.stats_t.readAc.access = XML->sys.core[ithCore].function_calls * 16; + RFWIN.stats_t.writeAc.access = XML->sys.core[ithCore].function_calls * 16; - RFWIN->rtp_stats = RFWIN->stats_t; + RFWIN.rtp_stats = RFWIN.stats_t; - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + + IRF.stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + XML->sys.core[ithCore].function_calls * 16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + + IRF.stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + XML->sys.core[ithCore].function_calls * 16; - IRF->rtp_stats = IRF->stats_t; + IRF.rtp_stats = IRF.stats_t; - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + + FRF.stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + XML->sys.core[ithCore].function_calls * 16; ; - FRF->stats_t.writeAc.access = + FRF.stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes + XML->sys.core[ithCore].function_calls * 16; ; - FRF->rtp_stats = FRF->stats_t; + FRF.rtp_stats = FRF.stats_t; } } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += - (IRF->stats_t.readAc.access * IRF->local_result.power.readOp.dynamic + - IRF->stats_t.writeAc.access * IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += - (FRF->stats_t.readAc.access * FRF->local_result.power.readOp.dynamic + - FRF->stats_t.writeAc.access * FRF->local_result.power.writeOp.dynamic); + IRF.power_t.reset(); + FRF.power_t.reset(); + IRF.power_t.readOp.dynamic += + (IRF.stats_t.readAc.access * IRF.local_result.power.readOp.dynamic + + IRF.stats_t.writeAc.access * IRF.local_result.power.writeOp.dynamic); + FRF.power_t.readOp.dynamic += + (FRF.stats_t.readAc.access * FRF.local_result.power.readOp.dynamic + + FRF.stats_t.writeAc.access * FRF.local_result.power.writeOp.dynamic); if (coredynp.regWindowing) { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += - (RFWIN->stats_t.readAc.access * - RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access * - RFWIN->local_result.power.writeOp.dynamic); + RFWIN.power_t.reset(); + RFWIN.power_t.readOp.dynamic += + (RFWIN.stats_t.readAc.access * + RFWIN.local_result.power.readOp.dynamic + + RFWIN.stats_t.writeAc.access * + RFWIN.local_result.power.writeOp.dynamic); } if (is_tdp) { - IRF->power = IRF->power_t + + IRF.power = IRF.power_t + ((coredynp.scheu_ty == ReservationStation) - ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) - : IRF->local_result.power); - FRF->power = FRF->power_t + + ? (IRF.local_result.power * coredynp.pppm_lkg_multhread) + : IRF.local_result.power); + FRF.power = FRF.power_t + ((coredynp.scheu_ty == ReservationStation) - ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) - : FRF->local_result.power); - power = power + (IRF->power + FRF->power); + ? (FRF.local_result.power * coredynp.pppm_lkg_multhread) + : FRF.local_result.power); + power = power + (IRF.power + FRF.power); if (coredynp.regWindowing) { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; - power = power + RFWIN->power; + RFWIN.power = RFWIN.power_t + RFWIN.local_result.power * pppm_lkg; + power = power + RFWIN.power; } } else { - IRF->rt_power = - IRF->power_t + + IRF.rt_power = + IRF.power_t + ((coredynp.scheu_ty == ReservationStation) - ? (IRF->local_result.power * coredynp.pppm_lkg_multhread) - : IRF->local_result.power); - FRF->rt_power = - FRF->power_t + + ? (IRF.local_result.power * coredynp.pppm_lkg_multhread) + : IRF.local_result.power); + FRF.rt_power = + FRF.power_t + ((coredynp.scheu_ty == ReservationStation) - ? (FRF->local_result.power * coredynp.pppm_lkg_multhread) - : FRF->local_result.power); - rt_power = rt_power + (IRF->power_t + FRF->power_t); + ? (FRF.local_result.power * coredynp.pppm_lkg_multhread) + : FRF.local_result.power); + rt_power = rt_power + (IRF.power_t + FRF.power_t); if (coredynp.regWindowing) { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power * pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; + RFWIN.rt_power = RFWIN.power_t + RFWIN.local_result.power * pppm_lkg; + rt_power = rt_power + RFWIN.rt_power; } } } @@ -328,112 +365,97 @@ void RegFU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (is_tdp) { cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << IRF.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << IRF->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << IRF.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? IRF->power.readOp.longer_channel_leakage - : IRF->power.readOp.leakage) + << (long_channel ? IRF.power.readOp.longer_channel_leakage + : IRF.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? IRF->power.readOp.power_gated_with_long_channel_leakage - : IRF->power.readOp.power_gated_leakage) + ? IRF.power.readOp.power_gated_with_long_channel_leakage + : IRF.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; + << "Gate Leakage = " << IRF.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next - << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic / executionTime + << "Runtime Dynamic = " << IRF.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Floating Point RF:" << endl; - cout << indent_str_next << "Area = " << FRF->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << FRF.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << FRF->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << FRF.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? FRF->power.readOp.longer_channel_leakage - : FRF->power.readOp.leakage) + << (long_channel ? FRF.power.readOp.longer_channel_leakage + : FRF.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? FRF->power.readOp.power_gated_with_long_channel_leakage - : FRF->power.readOp.power_gated_leakage) + ? FRF.power.readOp.power_gated_with_long_channel_leakage + : FRF.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl; + << "Gate Leakage = " << FRF.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next - << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic / executionTime + << "Runtime Dynamic = " << FRF.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (coredynp.regWindowing) { cout << indent_str << "Register Windows:" << endl; - cout << indent_str_next << "Area = " << RFWIN->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << RFWIN.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << RFWIN->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << RFWIN.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? RFWIN->power.readOp.longer_channel_leakage - : RFWIN->power.readOp.leakage) + << (long_channel ? RFWIN.power.readOp.longer_channel_leakage + : RFWIN.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? RFWIN->power.readOp.power_gated_with_long_channel_leakage - : RFWIN->power.readOp.power_gated_leakage) + ? RFWIN.power.readOp.power_gated_with_long_channel_leakage + : RFWIN.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << RFWIN.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << RFWIN->rt_power.readOp.dynamic / executionTime << " W" << endl; + << RFWIN.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else { cout << indent_str_next << "Integer RF Peak Dynamic = " - << IRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + << IRF.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Integer RF Subthreshold Leakage = " - << IRF->rt_power.readOp.leakage << " W" << endl; + << IRF.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage + << "Integer RF Gate Leakage = " << IRF.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Floating Point RF Peak Dynamic = " - << FRF->rt_power.readOp.dynamic * clockRate << " W" << endl; + << FRF.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " - << FRF->rt_power.readOp.leakage << " W" << endl; + << FRF.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Floating Point RF Gate Leakage = " - << FRF->rt_power.readOp.gate_leakage << " W" << endl; + << FRF.rt_power.readOp.gate_leakage << " W" << endl; if (coredynp.regWindowing) { cout << indent_str_next << "Register Windows Peak Dynamic = " - << RFWIN->rt_power.readOp.dynamic * clockRate << " W" << endl; + << RFWIN.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Register Windows Subthreshold Leakage = " - << RFWIN->rt_power.readOp.leakage << " W" << endl; + << RFWIN.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Register Windows Gate Leakage = " - << RFWIN->rt_power.readOp.gate_leakage << " W" << endl; + << RFWIN.rt_power.readOp.gate_leakage << " W" << endl; } } } RegFU ::~RegFU() { - - if (!exist) - return; - if (IRF) { - delete IRF; - IRF = 0; - } - if (FRF) { - delete FRF; - FRF = 0; - } - if (RFWIN) { - delete RFWIN; - RFWIN = 0; - } } diff --git a/src/core/regfile.h b/src/core/regfile.h index 264874e..5ddf2e3 100644 --- a/src/core/regfile.h +++ b/src/core/regfile.h @@ -52,19 +52,29 @@ class RegFU : public Component { double macro_PR_overhead; double int_regfile_height; double fp_regfile_height; - ArrayST *IRF; - ArrayST *FRF; - ArrayST *RFWIN; + ArrayST IRF; + ArrayST FRF; + ArrayST RFWIN; bool exist; - RegFU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~RegFU(); + RegFU(); + void set_params(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(bool is_tdp); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RegFU(); + + + private: + + bool init_params; + bool init_stats; }; #endif // __REGFILE_U_H__ From d1947479c6f814497f42e81cb3aa9688d9cdea53 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Thu, 18 Jun 2020 14:06:50 -0500 Subject: [PATCH 23/59] error in bypass in execute unit due to regfile --- src/core/exec_unit.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 794d5fc..566111c 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -14,7 +14,6 @@ * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -61,10 +60,8 @@ EXECU::EXECU(ParseXML *XML_interface, executionTime = coredynp.executionTime; rfu = new RegFU(); rfu->set_params(XML, ithCore, &interface_ip, coredynp); - rfu->computeArea(); rfu->set_stats(XML); - rfu->computeStaticPower(); - + rfu->computeArea(); scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + @@ -418,8 +415,8 @@ void EXECU::computeEnergy(bool is_tdp) { if (!exist) return; double pppm_t[4] = {1, 1, 1, 1}; - //rfu->power.reset(); - //rfu->rt_power.reset(); + // rfu->power.reset(); + // rfu->rt_power.reset(); // scheu->power.reset(); // scheu->rt_power.reset(); // exeu->power.reset(); @@ -672,4 +669,4 @@ EXECU ::~EXECU() { delete scheu; scheu = 0; } -} +} \ No newline at end of file From 4e2f97594502cc01fcb05e28db14298b549c78cd Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Thu, 18 Jun 2020 15:24:25 -0500 Subject: [PATCH 24/59] regfile error fixed --- src/core/exec_unit.cc | 3 ++- src/core/regfile.cc | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 566111c..4cbb7ce 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -60,8 +60,8 @@ EXECU::EXECU(ParseXML *XML_interface, executionTime = coredynp.executionTime; rfu = new RegFU(); rfu->set_params(XML, ithCore, &interface_ip, coredynp); - rfu->set_stats(XML); rfu->computeArea(); + rfu->set_stats(XML); scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + @@ -409,6 +409,7 @@ EXECU::EXECU(ParseXML *XML_interface, } } area.set_area(area.get_area() + bypass.area.get_area()); + } void EXECU::computeEnergy(bool is_tdp) { diff --git a/src/core/regfile.cc b/src/core/regfile.cc index 5742dd8..0953325 100644 --- a/src/core/regfile.cc +++ b/src/core/regfile.cc @@ -133,16 +133,7 @@ void RegFU::set_params(ParseXML *XML_interface, coredynp.core_ty); // area.set_area(area.get_area()*cdb_overhead); // output_data_csv(FRF.RF.local_result); - int_regfile_height = IRF.local_result.cache_ht * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1) * - sqrt(cdb_overhead); - fp_regfile_height = FRF.local_result.cache_ht * - ((coredynp.scheu_ty == ReservationStation) - ? XML->sys.core[ithCore].number_hardware_threads - : 1) * - sqrt(cdb_overhead); + // since a EXU is associated with each pipeline, the cdb should not have // longer length. if (coredynp.regWindowing) { @@ -192,6 +183,17 @@ void RegFU::computeStaticPower() { void RegFU::set_stats(const ParseXML *XML){ init_stats = true; + + int_regfile_height = IRF.local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); + fp_regfile_height = FRF.local_result.cache_ht * + ((coredynp.scheu_ty == ReservationStation) + ? XML->sys.core[ithCore].number_hardware_threads + : 1) * + sqrt(cdb_overhead); } void RegFU::computeArea(){ From be9217601254991bd4c372813ab0310ab9d5837e Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Thu, 18 Jun 2020 18:26:22 -0500 Subject: [PATCH 25/59] Regfile-Scheduler-Renaming done --- src/core/core.cc | 10 +- src/core/exec_unit.cc | 7 +- src/core/regfile.cc | 6 + src/core/renaming_unit.cc | 880 +++++++++++++++++++++----------------- src/core/renaming_unit.h | 39 +- src/core/scheduler.cc | 426 +++++++++--------- src/core/scheduler.h | 31 +- 7 files changed, 779 insertions(+), 620 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index 17dcabf..b4d7e99 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -76,7 +76,11 @@ Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); if (coredynp.core_ty == OOO) { - rnu = new RENAMINGU(XML, ithCore, &interface_ip, coredynp); + rnu = new RENAMINGU(); + rnu->set_params(XML, ithCore, &interface_ip, coredynp); + rnu->computeArea(); + rnu->set_stats(XML); + rnu->computeStaticPower(); } corepipe = new Pipeline(&interface_ip, coredynp); @@ -151,7 +155,7 @@ void Core::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeEnergy(is_tdp); + rnu->computeDynamicPower(is_tdp); set_pppm( pppm_t, coredynp.num_pipelines / num_units, @@ -241,7 +245,7 @@ void Core::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeEnergy(is_tdp); + rnu->computeDynamicPower(is_tdp); if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 4cbb7ce..1abe10c 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -62,7 +62,10 @@ EXECU::EXECU(ParseXML *XML_interface, rfu->set_params(XML, ithCore, &interface_ip, coredynp); rfu->computeArea(); rfu->set_stats(XML); - scheu = new SchedulerU(XML, ithCore, &interface_ip, coredynp); + scheu = new SchedulerU(); + scheu->set_params(XML, ithCore, &interface_ip, coredynp); + scheu->computeArea(); + scheu->set_stats(XML); exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + scheu->area.get_area()); @@ -424,7 +427,7 @@ void EXECU::computeEnergy(bool is_tdp) { // exeu->rt_power.reset(); rfu->computeDynamicPower(is_tdp); - scheu->computeEnergy(is_tdp); + scheu->computeDynamicPower(is_tdp); exeu->computeEnergy(is_tdp); if (coredynp.num_fpus > 0) { fp_u->computeEnergy(is_tdp); diff --git a/src/core/regfile.cc b/src/core/regfile.cc index 0953325..d3c757e 100644 --- a/src/core/regfile.cc +++ b/src/core/regfile.cc @@ -246,6 +246,12 @@ void RegFU::computeDynamicPower(bool is_tdp) { */ if (!exist) return; + if (!init_stats) { + std::cerr << "[ RegFU ] Error: must set stats before calling " + "computeDynamicPower()\n"; + + exit(1); + } if (is_tdp) { // init stats for Peak IRF.stats_t.readAc.access = diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index f1d6e15..8a0b292 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -43,14 +43,18 @@ #include #include -RENAMINGU::RENAMINGU(ParseXML *XML_interface, +RENAMINGU::RENAMINGU(){ + init_params = false; + init_stats = false; + +} + + +void RENAMINGU::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), iFRAT(0), fFRAT(0), iRRAT(0), fRRAT(0), ifreeL(0), - ffreeL(0), idcl(0), fdcl(0), RAHT(0), exist(exist_) { + bool exist_) { /* * Although renaming logic maybe be used in in-order processors, * McPAT assumes no renaming logic is used since the performance gain is very @@ -100,6 +104,13 @@ used for index the RAT entry to be updated. * */ + XML = XML_interface; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + ithCore = ithCore_; + + exist = exist_; + if (!exist) return; int tag, data, out_w; @@ -138,14 +149,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, + iFRAT.set_params(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - // FRAT floating point data = int(ceil(coredynp.phy_freg_width * (1 + coredynp.globalCheckpoint) / 8.0)); @@ -171,14 +179,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, + fFRAT.set_params(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); - } else if (coredynp.rm_ty == CAMbased) { // FRAT tag = coredynp.arch_ireg_width + coredynp.hthread_width; @@ -210,14 +215,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, + iFRAT.set_params(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - // FRAT for FP tag = coredynp.arch_freg_width + coredynp.hthread_width; data = int( @@ -248,13 +250,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, + fFRAT.set_params(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); } // RRAT is always RAM based, does not have GCs, and is used only for @@ -290,14 +290,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, + iRRAT.set_params(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); - area.set_area(area.get_area() + iRRAT->area.get_area()); - // RRAT for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); interface_ip.is_cache = false; @@ -322,13 +319,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, + fRRAT.set_params(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); - area.set_area(area.get_area() + fRRAT->area.get_area()); } // Freelist of renaming unit always RAM based and needed for RAM-based // RATs. Although it can be implemented within the CAM-based RAT, Current @@ -362,14 +357,11 @@ used for index the RAT entry to be updated. // every cycle, (coredynp.decodeW -1) inst may need to send back it dest // tags, committW insts needs to update freelist buffers interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, + ifreeL.set_params(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area() + - ifreeL->local_result.area); - area.set_area(area.get_area() + ifreeL->area.get_area()); // freelist for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); @@ -393,14 +385,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, + ffreeL.set_params(&interface_ip, "FP Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area() + - ffreeL->local_result.area); - area.set_area(area.get_area() + ffreeL->area.get_area()); idcl = new dep_resource_conflict_check( &interface_ip, @@ -437,19 +426,15 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, + iFRAT.set_params(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->local_result.adjust_area(); - // iFRAT->local_result.power.readOp.dynamic *= + // iFRAT.local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO - // iFRAT->local_result.power.writeOp.dynamic + // iFRAT.local_result.power.writeOp.dynamic //*=1+0.2*0.05;//compensate for GC - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); - // FP data = int(ceil(coredynp.phy_freg_width * (1 + coredynp.globalCheckpoint) / 8.0)); @@ -475,18 +460,16 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, + fFRAT.set_params(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->local_result.adjust_area(); - // fFRAT->local_result.power.readOp.dynamic *= + // fFRAT.local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO - // fFRAT->local_result.power.writeOp.dynamic + // fFRAT.local_result.power.writeOp.dynamic //*=1+0.2*0.05;//compensate for GC - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); + } else if (coredynp.rm_ty == CAMbased) { // FRAT @@ -517,13 +500,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, + iFRAT.set_params(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); - area.set_area(area.get_area() + iFRAT->area.get_area()); // FRAT tag = coredynp.arch_freg_width + coredynp.hthread_width; @@ -554,13 +535,11 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, + fFRAT.set_params(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); - area.set_area(area.get_area() + fFRAT->area.get_area()); } // Although no RRAT for RS based OOO is really needed since the archiRF // always holds the non-speculative data, having the RRAT or GC (not both) @@ -590,14 +569,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, + iRRAT.set_params(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); - area.set_area(area.get_area() + iRRAT->area.get_area()); - // RRAT for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); interface_ip.is_cache = false; @@ -622,13 +598,11 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, + fRRAT.set_params(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); - area.set_area(area.get_area() + fRRAT->area.get_area()); } // Freelist of renaming unit of RS based OOO is unifed for both int and fp @@ -654,14 +628,13 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, + ifreeL.set_params(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - // ifreeL->area.set_area(ifreeL->area.get_area()+ - // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area() + ifreeL->area.get_area()); + // ifreeL.area.set_area(ifreeL.area.get_area()+ + // ifreeL.local_result.area*XML->sys.core[ithCore].number_hardware_threads); idcl = new dep_resource_conflict_check( &interface_ip, @@ -683,88 +656,231 @@ used for index the RAT entry to be updated. fdcl = new dep_resource_conflict_check( &interface_ip, coredynp, coredynp.phy_freg_width); } + init_params = true; +} + +void RENAMINGU::computeArea(){ + + if (!init_params) { + std::cerr << "[ RENAMINGU ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + if (coredynp.core_ty == OOO) { + if (coredynp.scheu_ty == PhysicalRegFile) { + if (coredynp.rm_ty == + RAMbased) { // FRAT with global checkpointing (GCs) please see paper + // tech report for detailed explanation. + iFRAT.computeArea(); + iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); + area.set_area(area.get_area() + iFRAT.area.get_area()); + + fFRAT.computeArea(); + fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); + area.set_area(area.get_area() + fFRAT.area.get_area()); + + } else if (coredynp.rm_ty == CAMbased) { + iFRAT.computeArea(); + iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); + area.set_area(area.get_area() + iFRAT.area.get_area()); + + fFRAT.computeArea(); + fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); + area.set_area(area.get_area() + fFRAT.area.get_area()); + } + + // RRAT is always RAM based, does not have GCs, and is used only for + // record latest non-speculative mapping RRAT is not needed for CAM-based + // RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is + // not needed for RAM-based RAT with checkpoints McPAT assumes renaming + // unit to have RRAT when there is no checkpoints in FRAT, while MIPS + // R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with + // FRAT is very costly, especially for high issue width and high clock + // rate. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT.computeArea(); + iRRAT.area.set_area(iRRAT.area.get_area() + iRRAT.local_result.area); + area.set_area(area.get_area() + iRRAT.area.get_area()); + + fFRAT.computeArea(); + fRRAT.area.set_area(fRRAT.area.get_area() + fRRAT.local_result.area); + area.set_area(area.get_area() + fRRAT.area.get_area()); + } + // Freelist of renaming unit always RAM based and needed for RAM-based + // RATs. Although it can be implemented within the CAM-based RAT, Current + // McPAT does not have the free bits in the CAM but use the same external + // free list as a close approximation for CAM RAT. Recycle happens at two + // places: 1)when DCL check there are WAW, the Phy-registers/ROB directly + // recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + // therefore num_wr port = decode-1(-1 means at least one phy reg will be + // used for the current renaming group) + commit width + ifreeL.computeArea(); + ifreeL.area.set_area(ifreeL.area.get_area() + + ifreeL.local_result.area); + area.set_area(area.get_area() + ifreeL.area.get_area()); + + ffreeL.computeArea(); + ffreeL.area.set_area(ffreeL.area.get_area() + + ffreeL.local_result.area); + area.set_area(area.get_area() + ffreeL.area.get_area()); + + } else if (coredynp.scheu_ty == ReservationStation) { + if (coredynp.rm_ty == RAMbased) { + iFRAT.computeArea(); + iFRAT.local_result.adjust_area(); + // iFRAT.local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // iFRAT.local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); + area.set_area(area.get_area() + iFRAT.area.get_area()); + + fFRAT.computeArea(); + fFRAT.local_result.adjust_area(); + // fFRAT.local_result.power.readOp.dynamic *= + // 1+0.2*0.05;//1+mis-speculation% TODO + // fFRAT.local_result.power.writeOp.dynamic + //*=1+0.2*0.05;//compensate for GC + fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); + area.set_area(area.get_area() + fFRAT.area.get_area()); + + } else if (coredynp.rm_ty == CAMbased) { + // FRAT + iFRAT.computeArea(); + iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); + area.set_area(area.get_area() + iFRAT.area.get_area()); + + // FRAT + fFRAT.computeArea(); + fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); + area.set_area(area.get_area() + fFRAT.area.get_area()); + } + // Although no RRAT for RS based OOO is really needed since the archiRF + // always holds the non-speculative data, having the RRAT or GC (not both) + // can help the recovery of mis-speculations. + + if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { + iRRAT.computeArea(); + iRRAT.area.set_area(iRRAT.area.get_area() + iRRAT.local_result.area); + area.set_area(area.get_area() + iRRAT.area.get_area()); + + // RRAT for FP + fRRAT.computeArea(); + fRRAT.area.set_area(fRRAT.area.get_area() + fRRAT.local_result.area); + area.set_area(area.get_area() + fRRAT.area.get_area()); + } + + // Freelist of renaming unit of RS based OOO is unifed for both int and fp + // renaming unit since the ROB is unified + ifreeL.computeArea(); + // ifreeL.area.set_area(ifreeL.area.get_area()+ + // ifreeL.local_result.area*XML->sys.core[ithCore].number_hardware_threads); + area.set_area(area.get_area() + ifreeL.area.get_area()); + + } + } + +} + +void RENAMINGU::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. } -void RENAMINGU::computeEnergy(bool is_tdp) { +void RENAMINGU::set_stats(const ParseXML *XML){ + init_stats = true; +} + + +void RENAMINGU::computeDynamicPower(bool is_tdp) { if (!exist) return; + if (!init_stats) { + std::cerr << "[ RENAMINGU ] Error: must set stats before calling " + "computeDynamicPower()\n"; + + exit(1); + } double pppm_t[4] = {1, 1, 1, 1}; if (is_tdp) { // init stats for Peak if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_rd_ports; + iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; + iFRAT.tdp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; + fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_rd_ports; + fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; + fFRAT.tdp_stats = fFRAT.stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_search_ports; + iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; + iFRAT.tdp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; + fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_search_ports; + fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; + fFRAT.tdp_stats = fFRAT.stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; + iRRAT.stats_t.readAc.access = iRRAT.l_ip.num_rd_ports; + iRRAT.stats_t.writeAc.access = iRRAT.l_ip.num_wr_ports; + iRRAT.tdp_stats = iRRAT.stats_t; - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; + fRRAT.stats_t.readAc.access = fRRAT.l_ip.num_rd_ports; + fRRAT.stats_t.writeAc.access = fRRAT.l_ip.num_wr_ports; + fRRAT.tdp_stats = fRRAT.stats_t; } - ifreeL->stats_t.readAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = - coredynp.decodeW; // ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = - coredynp.decodeW; // ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; + ifreeL.stats_t.readAc.access = + coredynp.decodeW; // ifreeL.l_ip.num_rd_ports;; + ifreeL.stats_t.writeAc.access = + coredynp.decodeW; // ifreeL.l_ip.num_wr_ports; + ifreeL.tdp_stats = ifreeL.stats_t; + + ffreeL.stats_t.readAc.access = + coredynp.decodeW; // ffreeL.l_ip.num_rd_ports; + ffreeL.stats_t.writeAc.access = + coredynp.decodeW; // ffreeL.l_ip.num_wr_ports; + ffreeL.tdp_stats = ffreeL.stats_t; } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_rd_ports; + iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; + iFRAT.tdp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; + fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_rd_ports; + fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; + fFRAT.tdp_stats = fFRAT.stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_search_ports; + iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; + iFRAT.tdp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; + fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_search_ports; + fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; + fFRAT.tdp_stats = fFRAT.stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; + iRRAT.stats_t.readAc.access = iRRAT.l_ip.num_rd_ports; + iRRAT.stats_t.writeAc.access = iRRAT.l_ip.num_wr_ports; + iRRAT.tdp_stats = iRRAT.stats_t; - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; + fRRAT.stats_t.readAc.access = fRRAT.l_ip.num_rd_ports; + fRRAT.stats_t.writeAc.access = fRRAT.l_ip.num_wr_ports; + fRRAT.tdp_stats = fRRAT.stats_t; } // Unified free list for both int and fp - ifreeL->stats_t.readAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = - coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; + ifreeL.stats_t.readAc.access = + coredynp.decodeW; // ifreeL.l_ip.num_rd_ports; + ifreeL.stats_t.writeAc.access = + coredynp.decodeW; // ifreeL.l_ip.num_wr_ports; + ifreeL.tdp_stats = ifreeL.stats_t; } idcl->stats_t.readAc.access = coredynp.decodeW; fdcl->stats_t.readAc.access = coredynp.decodeW; @@ -783,101 +899,101 @@ void RENAMINGU::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT.rtp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = + fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; + fFRAT.rtp_stats = fFRAT.stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT.rtp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = + fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; + fFRAT.rtp_stats = fFRAT.stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = + iRRAT.stats_t.readAc.access = XML->sys.core[ithCore] .rename_writes; // Hack, should be (context switch + branch // mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; + iRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT.rtp_stats = iRRAT.stats_t; - fRRAT->stats_t.readAc.access = + fRRAT.stats_t.readAc.access = XML->sys.core[ithCore] .fp_rename_writes; // Hack, should be (context switch + branch // mispredictions)*16 - fRRAT->stats_t.writeAc.access = + fRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; + fRRAT.rtp_stats = fRRAT.stats_t; } - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = + ifreeL.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + ifreeL.stats_t.writeAc.access = 2 * XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; + ifreeL.rtp_stats = ifreeL.stats_t; - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = + ffreeL.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + ffreeL.stats_t.writeAc.access = 2 * XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; + ffreeL.rtp_stats = ffreeL.stats_t; } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - // iFRAT->stats_t.searchAc.access = + iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + // iFRAT.stats_t.searchAc.access = // XML->sys.core[ithCore].committed_int_instructions;//hack: not all // committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; + iFRAT.rtp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = + fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - // fFRAT->stats_t.searchAc.access = + // fFRAT.stats_t.searchAc.access = // XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; + fFRAT.rtp_stats = fFRAT.stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; + iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; + iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iFRAT.rtp_stats = iFRAT.stats_t; - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = + fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; + fFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; + fFRAT.rtp_stats = fFRAT.stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->stats_t.readAc.access = + iRRAT.stats_t.readAc.access = XML->sys.core[ithCore] .rename_writes; // Hack, should be (context switch + branch // mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; + iRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; + iRRAT.rtp_stats = iRRAT.stats_t; - fRRAT->stats_t.readAc.access = + fRRAT.stats_t.readAc.access = XML->sys.core[ithCore] .fp_rename_writes; // Hack, should be (context switch + branch // mispredictions)*16 - fRRAT->stats_t.writeAc.access = + fRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; + fRRAT.rtp_stats = fRRAT.stats_t; } // Unified free list for both int and fp since the ROB act as physcial // registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + + ifreeL.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = + ifreeL.stats_t.writeAc.access = 2 * (XML->sys.core[ithCore].rename_writes + XML->sys.core[ithCore] .fp_rename_writes); // HACK: 2-> since some of renaming in // the same group are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; + ifreeL.rtp_stats = ifreeL.stats_t; } idcl->stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * XML->sys.core[ithCore].rename_reads; @@ -900,122 +1016,122 @@ void RENAMINGU::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); + iFRAT.power_t.reset(); + fFRAT.power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.readOp.dynamic + + iFRAT.power_t.readOp.dynamic += + (iFRAT.stats_t.readAc.access * + (iFRAT.local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.readOp.dynamic + + iFRAT.stats_t.writeAc.access * + iFRAT.local_result.power.writeOp.dynamic); + fFRAT.power_t.readOp.dynamic += + (fFRAT.stats_t.readAc.access * + (fFRAT.local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); + fFRAT.stats_t.writeAc.access * + fFRAT.local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.searchOp.dynamic + + iFRAT.power_t.reset(); + fFRAT.power_t.reset(); + iFRAT.power_t.readOp.dynamic += + (iFRAT.stats_t.readAc.access * + (iFRAT.local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.searchOp.dynamic + + iFRAT.stats_t.writeAc.access * + iFRAT.local_result.power.writeOp.dynamic); + fFRAT.power_t.readOp.dynamic += + (fFRAT.stats_t.readAc.access * + (fFRAT.local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); + fFRAT.stats_t.writeAc.access * + fFRAT.local_result.power.writeOp.dynamic); } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += - (iRRAT->stats_t.readAc.access * - iRRAT->local_result.power.readOp.dynamic + - iRRAT->stats_t.writeAc.access * - iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += - (fRRAT->stats_t.readAc.access * - fRRAT->local_result.power.readOp.dynamic + - fRRAT->stats_t.writeAc.access * - fRRAT->local_result.power.writeOp.dynamic); + iRRAT.power_t.reset(); + fRRAT.power_t.reset(); + + iRRAT.power_t.readOp.dynamic += + (iRRAT.stats_t.readAc.access * + iRRAT.local_result.power.readOp.dynamic + + iRRAT.stats_t.writeAc.access * + iRRAT.local_result.power.writeOp.dynamic); + fRRAT.power_t.readOp.dynamic += + (fRRAT.stats_t.readAc.access * + fRRAT.local_result.power.readOp.dynamic + + fRRAT.stats_t.writeAc.access * + fRRAT.local_result.power.writeOp.dynamic); } - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += - (ifreeL->stats_t.readAc.access * - ifreeL->local_result.power.readOp.dynamic + - ifreeL->stats_t.writeAc.access * - ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += - (ffreeL->stats_t.readAc.access * - ffreeL->local_result.power.readOp.dynamic + - ffreeL->stats_t.writeAc.access * - ffreeL->local_result.power.writeOp.dynamic); + ifreeL.power_t.reset(); + ffreeL.power_t.reset(); + ifreeL.power_t.readOp.dynamic += + (ifreeL.stats_t.readAc.access * + ifreeL.local_result.power.readOp.dynamic + + ifreeL.stats_t.writeAc.access * + ifreeL.local_result.power.writeOp.dynamic); + ffreeL.power_t.readOp.dynamic += + (ffreeL.stats_t.readAc.access * + ffreeL.local_result.power.readOp.dynamic + + ffreeL.stats_t.writeAc.access * + ffreeL.local_result.power.writeOp.dynamic); } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); + iFRAT.power_t.reset(); + fFRAT.power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.readOp.dynamic + + iFRAT.power_t.readOp.dynamic += + (iFRAT.stats_t.readAc.access * + (iFRAT.local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.readOp.dynamic + + iFRAT.stats_t.writeAc.access * + iFRAT.local_result.power.writeOp.dynamic); + fFRAT.power_t.readOp.dynamic += + (fFRAT.stats_t.readAc.access * + (fFRAT.local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); + fFRAT.stats_t.writeAc.access * + fFRAT.local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += - (iFRAT->stats_t.readAc.access * - (iFRAT->local_result.power.searchOp.dynamic + + iFRAT.power_t.reset(); + fFRAT.power_t.reset(); + iFRAT.power_t.readOp.dynamic += + (iFRAT.stats_t.readAc.access * + (iFRAT.local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT->stats_t.writeAc.access * - iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += - (fFRAT->stats_t.readAc.access * - (fFRAT->local_result.power.searchOp.dynamic + + iFRAT.stats_t.writeAc.access * + iFRAT.local_result.power.writeOp.dynamic); + fFRAT.power_t.readOp.dynamic += + (fFRAT.stats_t.readAc.access * + (fFRAT.local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT->stats_t.writeAc.access * - fFRAT->local_result.power.writeOp.dynamic); + fFRAT.stats_t.writeAc.access * + fFRAT.local_result.power.writeOp.dynamic); } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += - (iRRAT->stats_t.readAc.access * - iRRAT->local_result.power.readOp.dynamic + - iRRAT->stats_t.writeAc.access * - iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += - (fRRAT->stats_t.readAc.access * - fRRAT->local_result.power.readOp.dynamic + - fRRAT->stats_t.writeAc.access * - fRRAT->local_result.power.writeOp.dynamic); + iRRAT.power_t.reset(); + fRRAT.power_t.reset(); + + iRRAT.power_t.readOp.dynamic += + (iRRAT.stats_t.readAc.access * + iRRAT.local_result.power.readOp.dynamic + + iRRAT.stats_t.writeAc.access * + iRRAT.local_result.power.writeOp.dynamic); + fRRAT.power_t.readOp.dynamic += + (fRRAT.stats_t.readAc.access * + fRRAT.local_result.power.readOp.dynamic + + fRRAT.stats_t.writeAc.access * + fRRAT.local_result.power.writeOp.dynamic); } - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += - (ifreeL->stats_t.readAc.access * - ifreeL->local_result.power.readOp.dynamic + - ifreeL->stats_t.writeAc.access * - ifreeL->local_result.power.writeOp.dynamic); + ifreeL.power_t.reset(); + ifreeL.power_t.readOp.dynamic += + (ifreeL.stats_t.readAc.access * + ifreeL.local_result.power.readOp.dynamic + + ifreeL.stats_t.writeAc.access * + ifreeL.local_result.power.writeOp.dynamic); } } else { @@ -1041,32 +1157,32 @@ void RENAMINGU::computeEnergy(bool is_tdp) { if (is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT->power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power; + iFRAT.power = + iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + fFRAT.power = + fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; + ffreeL.power = ffreeL.power_t + ffreeL.local_result.power; power = power + - (iFRAT->power + fFRAT->power) - //+ (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); + (iFRAT.power + fFRAT.power) + //+ (iRRAT.power + fRRAT.power) + + (ifreeL.power + ffreeL.power); if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; - power = power + (iRRAT->power + fRRAT->power); + iRRAT.power = iRRAT.power_t + iRRAT.local_result.power; + fRRAT.power = fRRAT.power_t + fRRAT.local_result.power; + power = power + (iRRAT.power + fRRAT.power); } } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT->power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; - power = power + (iFRAT->power + fFRAT->power) + ifreeL->power; + iFRAT.power = + iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + fFRAT.power = + fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; + power = power + (iFRAT.power + fFRAT.power) + ifreeL.power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; - power = power + (iRRAT->power + fRRAT->power); + iRRAT.power = iRRAT.power_t + iRRAT.local_result.power; + fRRAT.power = fRRAT.power_t + fRRAT.local_result.power; + power = power + (iRRAT.power + fRRAT.power); } } } else { @@ -1076,36 +1192,36 @@ void RENAMINGU::computeEnergy(bool is_tdp) { } else { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT->rt_power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->rt_power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + iFRAT.rt_power = + iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + fFRAT.rt_power = + fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power; + ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; + ffreeL.rt_power = ffreeL.power_t + ffreeL.local_result.power; rt_power = rt_power + - (iFRAT->rt_power + fFRAT->rt_power) - // + (iRRAT->rt_power + - // fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); + (iFRAT.rt_power + fFRAT.rt_power) + // + (iRRAT.rt_power + + // fRRAT.rt_power) + + (ifreeL.rt_power + ffreeL.rt_power); if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + iRRAT.rt_power = iRRAT.power_t + iRRAT.local_result.power; + fRRAT.rt_power = fRRAT.power_t + fRRAT.local_result.power; + rt_power = rt_power + (iRRAT.rt_power + fRRAT.rt_power); } } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT->rt_power = - iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; - fFRAT->rt_power = - fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + iFRAT.rt_power = + iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + fFRAT.rt_power = + fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; rt_power = - rt_power + (iFRAT->rt_power + fFRAT->rt_power) + ifreeL->rt_power; + rt_power + (iFRAT.rt_power + fFRAT.rt_power) + ifreeL.rt_power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; - rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); + iRRAT.rt_power = iRRAT.power_t + iRRAT.local_result.power; + fRRAT.rt_power = fRRAT.power_t + fRRAT.local_result.power; + rt_power = rt_power + (iRRAT.rt_power + fRRAT.rt_power); } } } else { @@ -1127,141 +1243,141 @@ void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (coredynp.core_ty == OOO) { cout << indent_str << "Int Front End RAT with " << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << iFRAT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << iFRAT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << iFRAT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iFRAT->power.readOp.longer_channel_leakage - : iFRAT->power.readOp.leakage) + << (long_channel ? iFRAT.power.readOp.longer_channel_leakage + : iFRAT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? iFRAT->power.readOp.power_gated_with_long_channel_leakage - : iFRAT->power.readOp.power_gated_leakage) + ? iFRAT.power.readOp.power_gated_with_long_channel_leakage + : iFRAT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << iFRAT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << iFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << iFRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "FP Front End RAT with " << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << fFRAT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << fFRAT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << fFRAT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << fFRAT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fFRAT->power.readOp.longer_channel_leakage - : fFRAT->power.readOp.leakage) + << (long_channel ? fFRAT.power.readOp.longer_channel_leakage + : fFRAT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? fFRAT->power.readOp.power_gated_with_long_channel_leakage - : fFRAT->power.readOp.power_gated_leakage) + ? fFRAT.power.readOp.power_gated_with_long_channel_leakage + : fFRAT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << fFRAT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << fFRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Free List:" << endl; - cout << indent_str_next << "Area = " << ifreeL->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ifreeL.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ifreeL->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << ifreeL.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ifreeL->power.readOp.longer_channel_leakage - : ifreeL->power.readOp.leakage) + << (long_channel ? ifreeL.power.readOp.longer_channel_leakage + : ifreeL.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? ifreeL->power.readOp.power_gated_with_long_channel_leakage - : ifreeL->power.readOp.power_gated_leakage) + ? ifreeL.power.readOp.power_gated_with_long_channel_leakage + : ifreeL.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ifreeL.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ifreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + << ifreeL.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { cout << indent_str << "Int Retire RAT: " << endl; - cout << indent_str_next << "Area = " << iRRAT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << iRRAT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << iRRAT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << iRRAT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iRRAT->power.readOp.longer_channel_leakage - : iRRAT->power.readOp.leakage) + << (long_channel ? iRRAT.power.readOp.longer_channel_leakage + : iRRAT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? iRRAT->power.readOp + << (long_channel ? iRRAT.power.readOp .power_gated_with_long_channel_leakage - : iRRAT->power.readOp.power_gated_leakage) + : iRRAT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << iRRAT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << iRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << iRRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "FP Retire RAT:" << endl; - cout << indent_str_next << "Area = " << fRRAT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << fRRAT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << fRRAT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << fRRAT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fRRAT->power.readOp.longer_channel_leakage - : fRRAT->power.readOp.leakage) + << (long_channel ? fRRAT.power.readOp.longer_channel_leakage + : fRRAT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? fRRAT->power.readOp + << (long_channel ? fRRAT.power.readOp .power_gated_with_long_channel_leakage - : fRRAT->power.readOp.power_gated_leakage) + : fRRAT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << fRRAT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << fRRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } if (coredynp.scheu_ty == PhysicalRegFile) { cout << indent_str << "FP Free List:" << endl; - cout << indent_str_next << "Area = " << ffreeL->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ffreeL.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ffreeL->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << ffreeL.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ffreeL->power.readOp.longer_channel_leakage - : ffreeL->power.readOp.leakage) + << (long_channel ? ffreeL.power.readOp.longer_channel_leakage + : ffreeL.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? ffreeL->power.readOp + << (long_channel ? ffreeL.power.readOp .power_gated_with_long_channel_leakage - : ffreeL->power.readOp.power_gated_leakage) + : ffreeL.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ffreeL.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ffreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; + << ffreeL.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else { @@ -1307,44 +1423,44 @@ void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } else { if (coredynp.core_ty == OOO) { cout << indent_str_next << "Int Front End RAT Peak Dynamic = " - << iFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + << iFRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " - << iFRAT->rt_power.readOp.leakage << " W" << endl; + << iFRAT.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Int Front End RAT Gate Leakage = " - << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; + << iFRAT.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP Front End RAT Peak Dynamic = " - << fFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + << fFRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " - << fFRAT->rt_power.readOp.leakage << " W" << endl; + << fFRAT.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Front End RAT Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Free List Peak Dynamic = " - << ifreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + << ifreeL.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Free List Subthreshold Leakage = " - << ifreeL->rt_power.readOp.leakage << " W" << endl; + << ifreeL.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Free List Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; if (coredynp.scheu_ty == PhysicalRegFile) { if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { cout << indent_str_next << "Int Retire RAT Peak Dynamic = " - << iRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + << iRRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " - << iRRAT->rt_power.readOp.leakage << " W" << endl; + << iRRAT.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Int Retire RAT Gate Leakage = " - << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; + << iRRAT.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP Retire RAT Peak Dynamic = " - << fRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; + << fRRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " - << fRRAT->rt_power.readOp.leakage << " W" << endl; + << fRRAT.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Retire RAT Gate Leakage = " - << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; + << fRRAT.rt_power.readOp.gate_leakage << " W" << endl; } cout << indent_str_next << "FP Free List Peak Dynamic = " - << ffreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; + << ffreeL.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Free List Subthreshold Leakage = " - << ffreeL->rt_power.readOp.leakage << " W" << endl; + << ffreeL.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Free List Gate Leakage = " - << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; } } else { cout << indent_str_next << "Int DCL Peak Dynamic = " @@ -1369,44 +1485,12 @@ RENAMINGU ::~RENAMINGU() { if (!exist) return; - if (iFRAT) { - delete iFRAT; - iFRAT = 0; - } - if (iRRAT) { - delete iRRAT; - iRRAT = 0; - } - if (iFRAT) { - delete iFRAT; - iFRAT = 0; - } - if (ifreeL) { - delete ifreeL; - ifreeL = 0; - } if (idcl) { delete idcl; idcl = 0; } - if (fFRAT) { - delete fFRAT; - fFRAT = 0; - } - if (fRRAT) { - delete fRRAT; - fRRAT = 0; - } if (fdcl) { delete fdcl; fdcl = 0; } - if (ffreeL) { - delete ffreeL; - ffreeL = 0; - } - if (RAHT) { - delete RAHT; - RAHT = 0; - } } diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index 0b2bbba..45e626d 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -47,25 +47,34 @@ class RENAMINGU : public Component { double clockRate; double executionTime; CoreDynParam coredynp; - ArrayST *iFRAT; - ArrayST *fFRAT; - ArrayST *iRRAT; - ArrayST *fRRAT; - ArrayST *ifreeL; - ArrayST *ffreeL; + ArrayST iFRAT; + ArrayST fFRAT; + ArrayST iRRAT; + ArrayST fRRAT; + ArrayST ifreeL; + ArrayST ffreeL; dep_resource_conflict_check *idcl; dep_resource_conflict_check *fdcl; - ArrayST *RAHT; // register alias history table Used to store GC + ArrayST RAHT; // register alias history table Used to store GC bool exist; - RENAMINGU(ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~RENAMINGU(); + RENAMINGU(); + void set_params(ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(bool is_tdp); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~RENAMINGU(); + + private: + + bool init_params; + bool init_stats; }; #endif // __RENAMING_U_H__ diff --git a/src/core/scheduler.cc b/src/core/scheduler.cc index 9525c5e..4a3cfe6 100644 --- a/src/core/scheduler.cc +++ b/src/core/scheduler.cc @@ -43,14 +43,25 @@ #include #include -SchedulerU::SchedulerU(ParseXML *XML_interface, +SchedulerU::SchedulerU(){ + init_params = false; + init_stats = false; + +} + +void SchedulerU::set_params(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), int_inst_window(0), fp_inst_window(0), ROB(0), - instruction_selection(0), exist(exist_) { + bool exist_){ + + XML = XML_interface; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + ithCore = ithCore_; + + exist = exist_; + if (!exist) return; int tag, data; @@ -95,19 +106,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, + int_inst_window.set_params(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area() + - int_inst_window->local_result.area * - coredynp.num_pipelines); - area.set_area(area.get_area() + - int_inst_window->local_result.area * coredynp.num_pipelines); - // output_data_csv(iRS.RS.local_result); - Iw_height = int_inst_window->local_result.cache_ht; - /* * selection logic * In a single-issue Inorder multithreaded processor like Niagara, issue @@ -188,17 +191,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, + int_inst_window.set_params(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area() + - int_inst_window->local_result.area * - coredynp.num_pipelines); - area.set_area(area.get_area() + - int_inst_window->local_result.area * coredynp.num_pipelines); - Iw_height = int_inst_window->local_result.cache_ht; // FU inst window if (coredynp.scheu_ty == PhysicalRegFile) { tag = 2 * coredynp.phy_freg_width; // TODO: each time only half of the tag @@ -240,18 +237,11 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.fp_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, + fp_inst_window.set_params(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area() + - fp_inst_window->local_result.area * - coredynp.num_fp_pipelines); - area.set_area(area.get_area() + fp_inst_window->local_result.area * - coredynp.num_fp_pipelines); - fp_Iw_height = fp_inst_window->local_result.cache_ht; - if (XML->sys.core[ithCore].ROB_size > 0) { /* * if ROB_size = 0, then the target processor does not support @@ -375,18 +365,12 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.peak_issueW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, + ROB.set_params(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area() + - ROB->local_result.area * coredynp.num_pipelines); - area.set_area(area.get_area() + - ROB->local_result.area * coredynp.num_pipelines); - ROB_height = ROB->local_result.cache_ht; } - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, @@ -395,6 +379,72 @@ SchedulerU::SchedulerU(ParseXML *XML_interface, Core_device, coredynp.core_ty); } + + init_params = true; +} + +void SchedulerU::computeStaticPower() { + // NOTE: this does nothing, as the static power is optimized + // along with the array area. +} + +void SchedulerU::set_stats(const ParseXML *XML){ + init_stats = true; + if ((coredynp.core_ty == Inorder && coredynp.multithreaded)) { + Iw_height = int_inst_window.local_result.cache_ht; + } + + if (coredynp.core_ty == OOO) { + Iw_height = int_inst_window.local_result.cache_ht; + fp_Iw_height = fp_inst_window.local_result.cache_ht; + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB_height = ROB.local_result.cache_ht; + } + } +} + +void SchedulerU::computeArea(){ + + if (!init_params) { + std::cerr << "[ SchedulerU ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + + if ((coredynp.core_ty == Inorder && coredynp.multithreaded)) { + int_inst_window.computeArea(); + int_inst_window.area.set_area(int_inst_window.area.get_area() + + int_inst_window.local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window.local_result.area * coredynp.num_pipelines); + } + + if (coredynp.core_ty == OOO) { + int_inst_window.computeArea(); + int_inst_window.area.set_area(int_inst_window.area.get_area() + + int_inst_window.local_result.area * + coredynp.num_pipelines); + area.set_area(area.get_area() + + int_inst_window.local_result.area * coredynp.num_pipelines); + + fp_inst_window.computeArea(); + fp_inst_window.area.set_area(fp_inst_window.area.get_area() + + fp_inst_window.local_result.area * + coredynp.num_fp_pipelines); + area.set_area(area.get_area() + fp_inst_window.local_result.area * + coredynp.num_fp_pipelines); + + if (XML->sys.core[ithCore].ROB_size > 0) { + ROB.computeArea(); + ROB.area.set_area(ROB.area.get_area() + + ROB.local_result.area * coredynp.num_pipelines); + area.set_area(area.get_area() + + ROB.local_result.area * coredynp.num_pipelines); + } + } + } void SchedulerU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { @@ -409,146 +459,152 @@ void SchedulerU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (coredynp.core_ty == OOO) { cout << indent_str << "Instruction Window:" << endl; cout << indent_str_next - << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << "Area = " << int_inst_window.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " - << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + << int_inst_window.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? int_inst_window->power.readOp.longer_channel_leakage - : int_inst_window->power.readOp.leakage) + ? int_inst_window.power.readOp.longer_channel_leakage + : int_inst_window.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? int_inst_window->power.readOp + ? int_inst_window.power.readOp .power_gated_with_long_channel_leakage - : int_inst_window->power.readOp.power_gated_leakage) + : int_inst_window.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << "Gate Leakage = " << int_inst_window.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << int_inst_window.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "FP Instruction Window:" << endl; cout << indent_str_next - << "Area = " << fp_inst_window->area.get_area() * 1e-6 << " mm^2" + << "Area = " << fp_inst_window.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " - << fp_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + << fp_inst_window.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? fp_inst_window->power.readOp.longer_channel_leakage - : fp_inst_window->power.readOp.leakage) + ? fp_inst_window.power.readOp.longer_channel_leakage + : fp_inst_window.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? fp_inst_window->power.readOp + ? fp_inst_window.power.readOp .power_gated_with_long_channel_leakage - : fp_inst_window->power.readOp.power_gated_leakage) + : fp_inst_window.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage + << "Gate Leakage = " << fp_inst_window.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fp_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << fp_inst_window.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (XML->sys.core[ithCore].ROB_size > 0) { cout << indent_str << "ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ROB.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ROB->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << ROB.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ROB->power.readOp.longer_channel_leakage - : ROB->power.readOp.leakage) + << (long_channel ? ROB.power.readOp.longer_channel_leakage + : ROB.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? ROB->power.readOp.power_gated_with_long_channel_leakage - : ROB->power.readOp.power_gated_leakage) + ? ROB.power.readOp.power_gated_with_long_channel_leakage + : ROB.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ROB.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ROB->rt_power.readOp.dynamic / executionTime << " W" << endl; + << ROB.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else if (coredynp.multithreaded) { cout << indent_str << "Instruction Window:" << endl; cout << indent_str_next - << "Area = " << int_inst_window->area.get_area() * 1e-6 << " mm^2" + << "Area = " << int_inst_window.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " - << int_inst_window->power.readOp.dynamic * clockRate << " W" << endl; + << int_inst_window.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? int_inst_window->power.readOp.longer_channel_leakage - : int_inst_window->power.readOp.leakage) + ? int_inst_window.power.readOp.longer_channel_leakage + : int_inst_window.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? int_inst_window->power.readOp + ? int_inst_window.power.readOp .power_gated_with_long_channel_leakage - : int_inst_window->power.readOp.power_gated_leakage) + : int_inst_window.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage + << "Gate Leakage = " << int_inst_window.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << int_inst_window->rt_power.readOp.dynamic / executionTime << " W" + << int_inst_window.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else { if (coredynp.core_ty == OOO) { cout << indent_str_next << "Instruction Window Peak Dynamic = " - << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << int_inst_window.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Instruction Window Subthreshold Leakage = " - << int_inst_window->rt_power.readOp.leakage << " W" << endl; + << int_inst_window.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Instruction Window Gate Leakage = " - << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + << int_inst_window.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP Instruction Window Peak Dynamic = " - << fp_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << fp_inst_window.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " - << fp_inst_window->rt_power.readOp.leakage << " W" << endl; + << fp_inst_window.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Instruction Window Gate Leakage = " - << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + << fp_inst_window.rt_power.readOp.gate_leakage << " W" << endl; if (XML->sys.core[ithCore].ROB_size > 0) { cout << indent_str_next << "ROB Peak Dynamic = " - << ROB->rt_power.readOp.dynamic * clockRate << " W" << endl; + << ROB.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next - << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage + << "ROB Subthreshold Leakage = " << ROB.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage + << "ROB Gate Leakage = " << ROB.rt_power.readOp.gate_leakage << " W" << endl; } } else if (coredynp.multithreaded) { cout << indent_str_next << "Instruction Window Peak Dynamic = " - << int_inst_window->rt_power.readOp.dynamic * clockRate << " W" + << int_inst_window.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Instruction Window Subthreshold Leakage = " - << int_inst_window->rt_power.readOp.leakage << " W" << endl; + << int_inst_window.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Instruction Window Gate Leakage = " - << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; + << int_inst_window.rt_power.readOp.gate_leakage << " W" << endl; } } } -void SchedulerU::computeEnergy(bool is_tdp) { +void SchedulerU::computeDynamicPower(bool is_tdp) { if (!exist) return; + if (!init_stats) { + std::cerr << "[ SchedulerU ] Error: must set stats before calling " + "computeDynamicPower()\n"; + + exit(1); + } double ROB_duty_cycle; // ROB_duty_cycle = ((coredynp.ALU_duty_cycle + // coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 @@ -560,29 +616,29 @@ void SchedulerU::computeEnergy(bool is_tdp) { // init stats if (is_tdp) { if (coredynp.core_ty == OOO) { - int_inst_window->stats_t.readAc.access = + int_inst_window.stats_t.readAc.access = coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = + coredynp.num_pipelines; // int_inst_window.l_ip.num_search_ports; + int_inst_window.stats_t.writeAc.access = coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = + coredynp.num_pipelines; // int_inst_window.l_ip.num_wr_ports; + int_inst_window.stats_t.searchAc.access = coredynp.issueW * coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = - fp_inst_window->l_ip.num_rd_ports * coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = - fp_inst_window->l_ip.num_wr_ports * coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = - fp_inst_window->l_ip.num_search_ports * coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; + int_inst_window.tdp_stats = int_inst_window.stats_t; + fp_inst_window.stats_t.readAc.access = + fp_inst_window.l_ip.num_rd_ports * coredynp.num_fp_pipelines; + fp_inst_window.stats_t.writeAc.access = + fp_inst_window.l_ip.num_wr_ports * coredynp.num_fp_pipelines; + fp_inst_window.stats_t.searchAc.access = + fp_inst_window.l_ip.num_search_ports * coredynp.num_fp_pipelines; + fp_inst_window.tdp_stats = fp_inst_window.stats_t; if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->stats_t.readAc.access = + ROB.stats_t.readAc.access = coredynp.commitW * coredynp.num_pipelines * ROB_duty_cycle; - ROB->stats_t.writeAc.access = + ROB.stats_t.writeAc.access = coredynp.issueW * coredynp.num_pipelines * ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; + ROB.tdp_stats = ROB.stats_t; /* * When inst commits, ROB must be read. @@ -599,38 +655,38 @@ void SchedulerU::computeEnergy(bool is_tdp) { } } else if (coredynp.multithreaded) { - int_inst_window->stats_t.readAc.access = + int_inst_window.stats_t.readAc.access = coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = + coredynp.num_pipelines; // int_inst_window.l_ip.num_search_ports; + int_inst_window.stats_t.writeAc.access = coredynp.issueW * - coredynp.num_pipelines; // int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = + coredynp.num_pipelines; // int_inst_window.l_ip.num_wr_ports; + int_inst_window.stats_t.searchAc.access = coredynp.issueW * coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; + int_inst_window.tdp_stats = int_inst_window.stats_t; } } else { // rtp if (coredynp.core_ty == OOO) { - int_inst_window->stats_t.readAc.access = + int_inst_window.stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = + int_inst_window.stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = + int_inst_window.stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = + int_inst_window.rtp_stats = int_inst_window.stats_t; + fp_inst_window.stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = + fp_inst_window.stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = + fp_inst_window.stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; + fp_inst_window.rtp_stats = fp_inst_window.stats_t; if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; + ROB.stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; + ROB.stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; /* ROB need to be updated in RS based OOO when new values are produced, * this update may happen before the commit stage when ROB entry is * released @@ -641,134 +697,134 @@ void SchedulerU::computeEnergy(bool is_tdp) { * stored in ROB, but register tags need to be read out and used to set * the RRAT and to recycle the register tag to free list buffer */ - ROB->rtp_stats = ROB->stats_t; + ROB.rtp_stats = ROB.stats_t; } } else if (coredynp.multithreaded) { - int_inst_window->stats_t.readAc.access = + int_inst_window.stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = + int_inst_window.stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = + int_inst_window.stats_t.searchAc.access = 2 * (XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; + int_inst_window.rtp_stats = int_inst_window.stats_t; } } // computation engine if (coredynp.core_ty == OOO) { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); + int_inst_window.power_t.reset(); + fp_inst_window.power_t.reset(); /* each instruction needs to write to scheduler, read out when all resources * and source operands are ready two search ops with one for each source * operand * */ - int_inst_window->power_t.readOp.dynamic += - int_inst_window->local_result.power.readOp.dynamic * - int_inst_window->stats_t.readAc.access + - int_inst_window->local_result.power.searchOp.dynamic * - int_inst_window->stats_t.searchAc.access + - int_inst_window->local_result.power.writeOp.dynamic * - int_inst_window->stats_t.writeAc.access + - int_inst_window->stats_t.readAc.access * + int_inst_window.power_t.readOp.dynamic += + int_inst_window.local_result.power.readOp.dynamic * + int_inst_window.stats_t.readAc.access + + int_inst_window.local_result.power.searchOp.dynamic * + int_inst_window.stats_t.searchAc.access + + int_inst_window.local_result.power.writeOp.dynamic * + int_inst_window.stats_t.writeAc.access + + int_inst_window.stats_t.readAc.access * instruction_selection->power.readOp.dynamic; - fp_inst_window->power_t.readOp.dynamic += - fp_inst_window->local_result.power.readOp.dynamic * - fp_inst_window->stats_t.readAc.access + - fp_inst_window->local_result.power.searchOp.dynamic * - fp_inst_window->stats_t.searchAc.access + - fp_inst_window->local_result.power.writeOp.dynamic * - fp_inst_window->stats_t.writeAc.access + - fp_inst_window->stats_t.writeAc.access * + fp_inst_window.power_t.readOp.dynamic += + fp_inst_window.local_result.power.readOp.dynamic * + fp_inst_window.stats_t.readAc.access + + fp_inst_window.local_result.power.searchOp.dynamic * + fp_inst_window.stats_t.searchAc.access + + fp_inst_window.local_result.power.writeOp.dynamic * + fp_inst_window.stats_t.writeAc.access + + fp_inst_window.stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += - ROB->local_result.power.readOp.dynamic * ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access * ROB->local_result.power.writeOp.dynamic; + ROB.power_t.reset(); + ROB.power_t.readOp.dynamic += + ROB.local_result.power.readOp.dynamic * ROB.stats_t.readAc.access + + ROB.stats_t.writeAc.access * ROB.local_result.power.writeOp.dynamic; } } else if (coredynp.multithreaded) { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += - int_inst_window->local_result.power.readOp.dynamic * - int_inst_window->stats_t.readAc.access + - int_inst_window->local_result.power.searchOp.dynamic * - int_inst_window->stats_t.searchAc.access + - int_inst_window->local_result.power.writeOp.dynamic * - int_inst_window->stats_t.writeAc.access + - int_inst_window->stats_t.writeAc.access * + int_inst_window.power_t.reset(); + int_inst_window.power_t.readOp.dynamic += + int_inst_window.local_result.power.readOp.dynamic * + int_inst_window.stats_t.readAc.access + + int_inst_window.local_result.power.searchOp.dynamic * + int_inst_window.stats_t.searchAc.access + + int_inst_window.local_result.power.writeOp.dynamic * + int_inst_window.stats_t.writeAc.access + + int_inst_window.stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; } // assign values if (is_tdp) { if (coredynp.core_ty == OOO) { - int_inst_window->power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * + int_inst_window.power = + int_inst_window.power_t + + (int_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; - fp_inst_window->power = - fp_inst_window->power_t + - (fp_inst_window->local_result.power + instruction_selection->power) * + fp_inst_window.power = + fp_inst_window.power_t + + (fp_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; + power = power + int_inst_window.power + fp_inst_window.power; if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->power = ROB->power_t + ROB->local_result.power * pppm_lkg; - power = power + ROB->power; + ROB.power = ROB.power_t + ROB.local_result.power * pppm_lkg; + power = power + ROB.power; } } else if (coredynp.multithreaded) { // set_pppm(pppm_t, // XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * + int_inst_window.power = + int_inst_window.power_t + + (int_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; - power = power + int_inst_window->power; + power = power + int_inst_window.power; } } else { // rtp if (coredynp.core_ty == OOO) { - int_inst_window->rt_power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * + int_inst_window.rt_power = + int_inst_window.power_t + + (int_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; - fp_inst_window->rt_power = - fp_inst_window->power_t + - (fp_inst_window->local_result.power + instruction_selection->power) * + fp_inst_window.rt_power = + fp_inst_window.power_t + + (fp_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; rt_power = - rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; + rt_power + int_inst_window.rt_power + fp_inst_window.rt_power; if (XML->sys.core[ithCore].ROB_size > 0) { - ROB->rt_power = ROB->power_t + ROB->local_result.power * pppm_lkg; - rt_power = rt_power + ROB->rt_power; + ROB.rt_power = ROB.power_t + ROB.local_result.power * pppm_lkg; + rt_power = rt_power + ROB.rt_power; } } else if (coredynp.multithreaded) { // set_pppm(pppm_t, // XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = - int_inst_window->power_t + - (int_inst_window->local_result.power + instruction_selection->power) * + int_inst_window.rt_power = + int_inst_window.power_t + + (int_inst_window.local_result.power + instruction_selection->power) * pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; + rt_power = rt_power + int_inst_window.rt_power; } } // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); // cout<<"Scheduler // power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage< Date: Sat, 20 Jun 2020 13:49:28 -0500 Subject: [PATCH 26/59] experimental merge: Preparing to merge Attempting to merge Ram's progress with the higher level Processor changes. --- CMakeLists.txt | 3 +- src/basic_components.h | 1 - src/cache/cache_param.cc | 18 +- src/cache/cache_param.h | 17 +- src/cache/sharedcache.cc | 41 +- src/cache/sharedcache.h | 3 +- src/core/branch_predictor.cc | 2 +- src/core/branch_predictor.h | 4 +- src/core/core.cc | 7 +- src/core/core.h | 6 +- src/core/exec_unit.cc | 2 +- src/core/exec_unit.h | 4 +- src/core/instfetch.cc | 2 +- src/core/instfetch.h | 4 +- src/core/loadstore.cc | 2 +- src/core/loadstore.h | 4 +- src/core/mmu.cc | 2 +- src/core/mmu.h | 4 +- src/core/regfile.cc | 2 +- src/core/regfile.h | 4 +- src/core/renaming_unit.cc | 2 +- src/core/renaming_unit.h | 4 +- src/core/scheduler.cc | 2 +- src/core/scheduler.h | 4 +- src/logic.cc | 4 +- src/logic.h | 8 +- src/noc.cc | 2 +- src/noc.h | 4 +- src/processor.cc | 700 +++++++++++++++++++++-------------- src/processor.h | 22 +- 30 files changed, 514 insertions(+), 370 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf16735..a6c26ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "Create a separate build directory") endif() -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.12) project(mcpat DESCRIPTION "Power Timing Area Calculator" LANGUAGES CXX) @@ -14,7 +14,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) - set (MCPAT_VERSION_MAJOR 1) set (MCPAT_VERSION_MINOR 3) set (MCPAT_VERSION_PATCH 0) diff --git a/src/basic_components.h b/src/basic_components.h index 64e3381..de58c86 100644 --- a/src/basic_components.h +++ b/src/basic_components.h @@ -161,7 +161,6 @@ class CoreDynParam { ~CoreDynParam(){}; }; - class MCParam { public: MCParam(){}; diff --git a/src/cache/cache_param.cc b/src/cache/cache_param.cc index a0e1ad0..4f1a671 100644 --- a/src/cache/cache_param.cc +++ b/src/cache/cache_param.cc @@ -33,7 +33,8 @@ #include -void CacheDynParam::set_params_l2_cache(const ParseXML* XML, const int ithCache) { +void CacheDynParam::set_params_l2_cache(const ParseXML *XML, + const int ithCache) { this->name = "L2"; this->clockRate = XML->sys.L2[ithCache].clockrate; this->clockRate *= 1e6; @@ -58,7 +59,8 @@ void CacheDynParam::set_params_l2_cache(const ParseXML* XML, const int ithCache) } } -void CacheDynParam::set_params_l3_cache(const ParseXML* XML, const int ithCache) { +void CacheDynParam::set_params_l3_cache(const ParseXML *XML, + const int ithCache) { this->name = "L3"; this->clockRate = XML->sys.L3[ithCache].clockrate; this->clockRate *= 1e6; @@ -83,10 +85,10 @@ void CacheDynParam::set_params_l3_cache(const ParseXML* XML, const int ithCache) } } -void CacheDynParam::set_params_l1_directory(const ParseXML* XML, const int ithCache) { +void CacheDynParam::set_params_l1_directory(const ParseXML *XML, + const int ithCache) { this->name = "First Level Directory"; - this->dir_ty = - (enum Dir_type)XML->sys.L1Directory[ithCache].Directory_type; + this->dir_ty = (enum Dir_type)XML->sys.L1Directory[ithCache].Directory_type; this->clockRate = XML->sys.L1Directory[ithCache].clockrate; this->clockRate *= 1e6; this->executionTime = @@ -106,10 +108,10 @@ void CacheDynParam::set_params_l1_directory(const ParseXML* XML, const int ithCa this->duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; } -void CacheDynParam::set_params_l2_directory(const ParseXML* XML, const int ithCache) { +void CacheDynParam::set_params_l2_directory(const ParseXML *XML, + const int ithCache) { this->name = "Second Level Directory"; - this->dir_ty = - (enum Dir_type)XML->sys.L2Directory[ithCache].Directory_type; + this->dir_ty = (enum Dir_type)XML->sys.L2Directory[ithCache].Directory_type; this->clockRate = XML->sys.L2Directory[ithCache].clockrate; this->clockRate *= 1e6; this->executionTime = diff --git a/src/cache/cache_param.h b/src/cache/cache_param.h index 9df1d0d..35a4b04 100644 --- a/src/cache/cache_param.h +++ b/src/cache/cache_param.h @@ -32,18 +32,13 @@ #ifndef __CACHE_PARAM_H__ #define __CACHE_PARAM_H__ -#include "basic_components.h" #include "XML_Parse.h" +#include "basic_components.h" #include "parameter.h" #include -enum cache_level { - L2, - L3, - L1Directory, - L2Directory -}; +enum cache_level { L2, L3, L1Directory, L2Directory }; class CacheDynParam { public: @@ -68,10 +63,10 @@ class CacheDynParam { double power_gating_vcc; CacheDynParam(){}; ~CacheDynParam(){}; - void set_params_l2_cache(const ParseXML* XML, const int ithCache); - void set_params_l3_cache(const ParseXML* XML, const int ithCache); - void set_params_l1_directory(const ParseXML* XML, const int ithCache); - void set_params_l2_directory(const ParseXML* XML, const int ithCache); + void set_params_l2_cache(const ParseXML *XML, const int ithCache); + void set_params_l3_cache(const ParseXML *XML, const int ithCache); + void set_params_l1_directory(const ParseXML *XML, const int ithCache); + void set_params_l2_directory(const ParseXML *XML, const int ithCache); }; #endif diff --git a/src/cache/sharedcache.cc b/src/cache/sharedcache.cc index 775645c..7168aae 100644 --- a/src/cache/sharedcache.cc +++ b/src/cache/sharedcache.cc @@ -58,7 +58,7 @@ SharedCache::SharedCache() { dir_overhead = 0.0; scktRatio = 0.0; executionTime = 0.0; - + device_t = Core_device; core_t = OOO; @@ -71,12 +71,12 @@ SharedCache::SharedCache() { banks = 0.0; } -void SharedCache::set_params(const ParseXML* XML, +void SharedCache::set_params(const ParseXML *XML, const int ithCache, - InputParameter* interface_ip_, + InputParameter *interface_ip_, const enum cache_level cacheL_) { int idx = 0; - int tag = 0; + int tag = 0; int data = 0; this->cacheL = cacheL_; this->interface_ip = *interface_ip_; @@ -90,16 +90,17 @@ void SharedCache::set_params(const ParseXML* XML, core_t = Inorder; } - switch(cacheL) { - case L2 : { + switch (cacheL) { + case L2: { cachep.set_params_l2_cache(XML, ithCache); interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; // long channel device LSTP interface_ip.data_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L2[ithCache].device_type; interface_ip.tag_arr_peri_global_tech_type = - XML->sys.L2[ithCache].device_type; + XML->sys.L2[ithCache].device_type; if (XML->sys.Private_L2 && XML->sys.core[ithCache].vdd > 0) { interface_ip.specific_hp_vdd = true; interface_ip.specific_lop_vdd = true; @@ -108,7 +109,8 @@ void SharedCache::set_params(const ParseXML* XML, interface_ip.lop_Vdd = XML->sys.core[ithCache].vdd; interface_ip.lstp_Vdd = XML->sys.core[ithCache].vdd; } - if (XML->sys.Private_L2 && XML->sys.core[ithCache].power_gating_vcc > -1) { + if (XML->sys.Private_L2 && + XML->sys.core[ithCache].power_gating_vcc > -1) { interface_ip.specific_vcc_min = true; interface_ip.user_defined_vcc_min = XML->sys.core[ithCache].power_gating_vcc; @@ -128,13 +130,14 @@ void SharedCache::set_params(const ParseXML* XML, } break; } - case L3 : { + case L3: { cachep.set_params_l3_cache(XML, ithCache); interface_ip.data_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; // long channel device LSTP interface_ip.data_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; + interface_ip.tag_arr_ram_cell_tech_type = + XML->sys.L3[ithCache].device_type; interface_ip.tag_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; if (XML->sys.L3[ithCache].vdd > 0) { @@ -152,10 +155,11 @@ void SharedCache::set_params(const ParseXML* XML, } break; } - case L1Directory : { + case L1Directory: { cachep.set_params_l1_directory(XML, ithCache); interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L1Directory[ithCache].device_type; // long channel device LSTP + XML->sys.L1Directory[ithCache] + .device_type; // long channel device LSTP interface_ip.data_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; interface_ip.tag_arr_ram_cell_tech_type = @@ -177,10 +181,11 @@ void SharedCache::set_params(const ParseXML* XML, } break; } - case L2Directory : { + case L2Directory: { cachep.set_params_l2_directory(XML, ithCache); interface_ip.data_arr_ram_cell_tech_type = - XML->sys.L2Directory[ithCache].device_type; // long channel device LSTP + XML->sys.L2Directory[ithCache] + .device_type; // long channel device LSTP interface_ip.data_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; interface_ip.tag_arr_ram_cell_tech_type = @@ -202,7 +207,7 @@ void SharedCache::set_params(const ParseXML* XML, } break; } - default : { + default: { std::cerr << "[ SharedCache ] Error: Not a valid Cache Type" << std::endl; exit(1); } @@ -399,12 +404,11 @@ void SharedCache::set_params(const ParseXML* XML, init_params = true; } -void SharedCache::set_stats(const ParseXML* XML) { +void SharedCache::set_stats(const ParseXML *XML) { this->XML = XML; init_stats = true; } - void SharedCache::computeArea() { if (!init_params) { std::cerr << "[ SharedCache ] Error: must set params before calling " @@ -1035,4 +1039,3 @@ void SharedCache::display(uint32_t indent, bool is_tdp) { //// ///cout<<"maxpower=" < #include -BranchPredictor::BranchPredictor(ParseXML *XML_interface, +BranchPredictor::BranchPredictor(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h index 8fb5d57..887fc8b 100644 --- a/src/core/branch_predictor.h +++ b/src/core/branch_predictor.h @@ -41,7 +41,7 @@ class BranchPredictor : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -58,7 +58,7 @@ class BranchPredictor : public Component { ArrayST *RAS; bool exist; - BranchPredictor(ParseXML *XML_interface, + BranchPredictor(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/core.cc b/src/core/core.cc index 17dcabf..7d9d59c 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -44,7 +44,9 @@ #include //#include "globalvar.h" -Core::Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) +Core::Core(const ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), ifu(0), lsu(0), mmu(0), exu(0), rnu(0), corepipe(0), undiffCore(0), l2cache(0) { @@ -361,11 +363,12 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << (long_channel ? power.readOp.longer_channel_leakage : power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str << "Subthreshold Leakage with power gating = " << (long_channel ? power.readOp.power_gated_with_long_channel_leakage : power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; cout << indent_str diff --git a/src/core/core.h b/src/core/core.h index b31c203..3a3ba1c 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -48,7 +48,7 @@ class Core : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; double clockRate, executionTime; @@ -64,7 +64,9 @@ class Core : public Component { CoreDynParam coredynp; // full_decoder inst_decoder; // clock_network clockNetwork; - Core(ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_); + Core(const ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_); void set_core_param(); void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 3d9a219..efd1424 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -43,7 +43,7 @@ #include #include -EXECU::EXECU(ParseXML *XML_interface, +EXECU::EXECU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, double lsq_height_, diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 0199d7d..b73a491 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -43,7 +43,7 @@ class EXECU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; double clockRate; @@ -68,7 +68,7 @@ class EXECU : public Component { Component bypass; bool exist; - EXECU(ParseXML *XML_interface, + EXECU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, double lsq_height_, diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index c02c65d..96f2913 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -43,7 +43,7 @@ #include #include -InstFetchU::InstFetchU(ParseXML *XML_interface, +InstFetchU::InstFetchU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/instfetch.h b/src/core/instfetch.h index e32ff76..089a05b 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -43,7 +43,7 @@ class InstFetchU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -62,7 +62,7 @@ class InstFetchU : public Component { inst_decoder *ID_misc; bool exist; - InstFetchU(ParseXML *XML_interface, + InstFetchU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc index a961130..6545b95 100644 --- a/src/core/loadstore.cc +++ b/src/core/loadstore.cc @@ -43,7 +43,7 @@ #include #include -LoadStoreU::LoadStoreU(ParseXML *XML_interface, +LoadStoreU::LoadStoreU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/loadstore.h b/src/core/loadstore.h index 500d32e..fbda670 100644 --- a/src/core/loadstore.h +++ b/src/core/loadstore.h @@ -42,7 +42,7 @@ class LoadStoreU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -59,7 +59,7 @@ class LoadStoreU : public Component { ArrayST *LoadQ; bool exist; - LoadStoreU(ParseXML *XML_interface, + LoadStoreU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/mmu.cc b/src/core/mmu.cc index 776b6fb..278d613 100644 --- a/src/core/mmu.cc +++ b/src/core/mmu.cc @@ -43,7 +43,7 @@ #include #include -MemManU::MemManU(ParseXML *XML_interface, +MemManU::MemManU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/mmu.h b/src/core/mmu.h index 19e6312..f853312 100644 --- a/src/core/mmu.h +++ b/src/core/mmu.h @@ -41,7 +41,7 @@ class MemManU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -54,7 +54,7 @@ class MemManU : public Component { ArrayST *dtlb; bool exist; - MemManU(ParseXML *XML_interface, + MemManU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/regfile.cc b/src/core/regfile.cc index bd77879..e0908b3 100644 --- a/src/core/regfile.cc +++ b/src/core/regfile.cc @@ -43,7 +43,7 @@ #include #include -RegFU::RegFU(ParseXML *XML_interface, +RegFU::RegFU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/regfile.h b/src/core/regfile.h index 264874e..4a75967 100644 --- a/src/core/regfile.h +++ b/src/core/regfile.h @@ -41,7 +41,7 @@ class RegFU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -57,7 +57,7 @@ class RegFU : public Component { ArrayST *RFWIN; bool exist; - RegFU(ParseXML *XML_interface, + RegFU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index f1d6e15..e14e89d 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -43,7 +43,7 @@ #include #include -RENAMINGU::RENAMINGU(ParseXML *XML_interface, +RENAMINGU::RENAMINGU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index 0b2bbba..7560a62 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -41,7 +41,7 @@ class RENAMINGU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; double clockRate; @@ -58,7 +58,7 @@ class RENAMINGU : public Component { ArrayST *RAHT; // register alias history table Used to store GC bool exist; - RENAMINGU(ParseXML *XML_interface, + RENAMINGU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/scheduler.cc b/src/core/scheduler.cc index 9525c5e..055523a 100644 --- a/src/core/scheduler.cc +++ b/src/core/scheduler.cc @@ -43,7 +43,7 @@ #include #include -SchedulerU::SchedulerU(ParseXML *XML_interface, +SchedulerU::SchedulerU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/core/scheduler.h b/src/core/scheduler.h index 64cb15e..149f808 100644 --- a/src/core/scheduler.h +++ b/src/core/scheduler.h @@ -41,7 +41,7 @@ class SchedulerU : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -59,7 +59,7 @@ class SchedulerU : public Component { selection_logic *instruction_selection; bool exist; - SchedulerU(ParseXML *XML_interface, + SchedulerU(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/logic.cc b/src/logic.cc index cff4164..4d7d24f 100644 --- a/src/logic.cc +++ b/src/logic.cc @@ -537,7 +537,7 @@ void Pipeline::compute_stage_vector() { } } -FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, +FunctionalUnit::FunctionalUnit(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, @@ -1018,7 +1018,7 @@ void FunctionalUnit::leakage_feedback(double temperature) { power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; } -UndiffCore::UndiffCore(ParseXML *XML_interface, +UndiffCore::UndiffCore(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, diff --git a/src/logic.h b/src/logic.h index aea2a52..2fb6d64 100644 --- a/src/logic.h +++ b/src/logic.h @@ -197,7 +197,7 @@ class Pipeline : public Component { class FunctionalUnit : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; @@ -212,7 +212,7 @@ class FunctionalUnit : public Component { statsDef stats_t; powerDef power_t; - FunctionalUnit(ParseXML *XML_interface, + FunctionalUnit(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, @@ -224,13 +224,13 @@ class FunctionalUnit : public Component { class UndiffCore : public Component { public: - UndiffCore(ParseXML *XML_interface, + UndiffCore(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_ = true, bool embedded_ = false); - ParseXML *XML; + const ParseXML *XML; int ithCore; InputParameter interface_ip; CoreDynParam coredynp; diff --git a/src/noc.cc b/src/noc.cc index a58ab8d..1f7831e 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -43,7 +43,7 @@ #include #include -NoC::NoC(ParseXML *XML_interface, +NoC::NoC(const ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, double M_traffic_pattern_, diff --git a/src/noc.h b/src/noc.h index 27af3f4..288d334 100644 --- a/src/noc.h +++ b/src/noc.h @@ -41,7 +41,7 @@ class NoC : public Component { public: - ParseXML *XML; + const ParseXML *XML; int ithNoC; InputParameter interface_ip; double link_len; @@ -60,7 +60,7 @@ class NoC : public Component { bool router_exist; string name, link_name; double M_traffic_pattern; - NoC(ParseXML *XML_interface, + NoC(const ParseXML *XML_interface, int ithNoC_, InputParameter *interface_ip_, double M_traffic_pattern_ = 0.6, diff --git a/src/processor.cc b/src/processor.cc index ab4992e..6b4a706 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -45,15 +45,123 @@ #include #include -Processor::Processor(ParseXML *XML_interface) +Processor::Processor(ParseXML *XML_interface, const bool calc_area) : XML(XML_interface) { // TODO: using one global copy may have problems. /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory * controller on chip, thus McPAT only support homogeneous memory controllers. */ + create(XML_interface, calc_area); +} + +void Processor::create(const ParseXML *XML_interface, const bool calc_area) { int i; double pppm_t[4] = {1, 1, 1, 1}; + + this->XML = XML_interface; + init(); + compute_area(calc_area); + compute_power(); + + if (numNOC > 0) { + for (i = 0; i < numNOC; i++) { + if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used + nocs.push_back(new NoC(XML, i, &interface_ip, 1)); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } else { // Bus based interconnect + nocs.push_back( + new NoC(XML, + i, + &interface_ip, + 1, + sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } + } + + /* + * Compute global links associated with each NOC, if any. This must be done + * at the end (even after the NOC router part) since the total chip area + * must be obtain to decide the link routing + */ + for (i = 0; i < numNOC; i++) { + if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { + nocs[i]->init_link_bus( + sqrt(area.get_area() * + XML->sys.NoC[i].chip_coverage)); // compute global links + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + } else { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + } + } + } + // Compute energy of NoC (w or w/o links) or buses + for (i = 0; i < numNOC; i++) { + nocs[i]->computeEnergy(); + nocs[i]->computeEnergy(false); + if (procdynp.homoNOC) { + set_pppm(pppm_t, + procdynp.numNOC * nocs[i]->nocdynp.clockRate, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); + noc.power = noc.power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, + 1 / nocs[i]->nocdynp.executionTime, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + power = power + noc.power; + rt_power = rt_power + noc.rt_power; + } else { + set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); + noc.power = noc.power + nocs[i]->power * pppm_t; + power = power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + rt_power = rt_power + nocs[i]->rt_power * pppm_t; + } + } + } + + // //clock power + // globalClock.init_wire_external(is_default, &interface_ip); + // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 + // globalClock.end_wiring_level =5;//toplevel metal + // globalClock.start_wiring_level =5;//toplevel metal + // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local + // final nodes globalClock.optimize_wire(); +} + +void Processor::init() { + int i; set_proc_param(); if (procdynp.homoCore) numCore = procdynp.numCore == 0 ? 0 : 1; @@ -66,8 +174,10 @@ Processor::Processor(ParseXML *XML_interface) numL2 = procdynp.numL2; if (XML->sys.Private_L2 && numCore != numL2) { - cout << "Number of private L2 does not match number of cores" << endl; - exit(0); + std::cerr << "[ Processor ] Error: Number of private L2 does not match " + "number of cores" + << endl; + exit(1); } if (procdynp.homoL3) @@ -80,12 +190,6 @@ Processor::Processor(ParseXML *XML_interface) else numNOC = procdynp.numNOC; - // if (!procdynp.homoNOC) - // { - // cout<<"Current McPAT does not support heterogeneous NOC"<computeEnergy(); - cores[i]->computeEnergy(false); + } + + // L2: + if (!XML->sys.Private_L2) { + if (numL2 > 0) { + for (i = 0; i < numL2; i++) { + l2array.push_back(SharedCache()); + l2array[i].set_params(XML, i, &interface_ip); + l2array[i].set_stats(XML); + } + } + } + + // L3: + if (numL3 > 0) { + for (i = 0; i < numL3; i++) { + l3array.push_back(SharedCache()); + l3array[i].set_params(XML, i, &interface_ip, L3); + l3array[i].set_stats(XML); + } + } + + // L1 Dir: + if (numL1Dir > 0) { + for (i = 0; i < numL1Dir; i++) { + l1dirarray.push_back(SharedCache()); + l1dirarray[i].set_params(XML, i, &interface_ip, L1Directory); + l1dirarray[i].set_stats(XML); + } + } + + // L2 Dir: + if (numL2Dir > 0) { + for (i = 0; i < numL2Dir; i++) { + l2dirarray.push_back(SharedCache()); + l2dirarray[i].set_params(XML, i, &interface_ip, L2Directory); + l2dirarray[i].set_stats(XML); + } + } + + // MC: + if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { + mc.set_params(XML, &interface_ip, MC); + mc.set_stats(XML); + } + + // Flash Controller Init: + if (XML->sys.flashc.number_mcs > 0) // flash controller + { + flashcontroller.set_params(XML, &interface_ip); + flashcontroller.set_stats(XML); + } + + // Network Interface Unit Init + if (XML->sys.niu.number_units > 0) { + niu.set_params(XML, &interface_ip); + niu.set_stats(XML); + } + + // PCIE Init + if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { + pcie.set_params(XML, &interface_ip); + pcie.set_stats(XML); + } + + // TODO: Noc Init +} + +void Processor::compute_area(const bool calc_area) { + int i; + double pppm_t[4] = {1, 1, 1, 1}; + + // Compute Area: + for (i = 0; i < numCore; i++) { if (procdynp.homoCore) { core.area.set_area(core.area.get_area() + cores[i]->area.get_area() * procdynp.numCore); + area.set_area(area.get_area() + + core.area.get_area()); // placement and routing overhead is + // 10%, core scales worse than cache + // 40% is accumulated from 90 to 22nm + } else { + core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); + area.set_area( + area.get_area() + + cores[i]->area.get_area()); // placement and routing overhead is 10%, + // core scales worse than cache 40% is + // accumulated from 90 to 22nm + } + } + + // L2 Calc Area: + if (!XML->sys.Private_L2) { + if (numL2 > 0) { + for (i = 0; i < numL2; i++) { + if (calc_area) { + l2array[i].computeArea(); + } + if (procdynp.homoL2) { + l2.area.set_area(l2.area.get_area() + + l2array[i].area.get_area() * procdynp.numL2); + area.set_area( + area.get_area() + + l2.area.get_area()); // placement and routing overhead is 10%, l2 + // scales worse than cache 40% is accumulated + // from 90 to 22nm + } else { + l2.area.set_area(l2.area.get_area() + l2array[i].area.get_area()); + area.set_area( + area.get_area() + + l2array[i].area.get_area()); // placement and routing overhead is + // 10%, l2 scales worse than cache + // 40% is accumulated from 90 to 22nm + } + } + } + } + + // L3 Area: + if (numL3 > 0) { + for (i = 0; i < numL3; i++) { + l3array[i].computeArea(); + if (procdynp.homoL3) { + l3.area.set_area(l3.area.get_area() + + l3array[i].area.get_area() * procdynp.numL3); + area.set_area(area.get_area() + + l3.area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache + // 40% is accumulated from 90 to 22nm + } else { + l3.area.set_area(l3.area.get_area() + l3array[i].area.get_area()); + area.set_area( + area.get_area() + + l3array[i].area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache 40% + // is accumulated from 90 to 22nm + } + } + } + + // L1 Dir Area: + if (numL1Dir > 0) { + for (i = 0; i < numL1Dir; i++) { + l1dirarray[i].computeArea(); + if (procdynp.homoL1Dir) { + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i].area.get_area() * procdynp.numL1Dir); + area.set_area( + area.get_area() + + l1dir.area.get_area()); // placement and routing overhead is 10%, + // l1dir scales worse than cache 40% is + // accumulated from 90 to 22nm + + } else { + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i].area.get_area()); + area.set_area(area.get_area() + l1dirarray[i].area.get_area()); + } + } + } + + // L2 Dir Area: + if (numL2Dir > 0) { + for (i = 0; i < numL2Dir; i++) { + if (calc_area) { + l2dirarray[i].computeArea(); + } + if (procdynp.homoL2Dir) { + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i].area.get_area() * procdynp.numL2Dir); + area.set_area( + area.get_area() + + l2dir.area.get_area()); // placement and routing overhead is 10%, + // l2dir scales worse than cache 40% is + // accumulated from 90 to 22nm + } else { + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i].area.get_area()); + area.set_area(area.get_area() + l2dirarray[i].area.get_area()); + } + } + } + + // MC Calc Area: + if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { + if (calc_area) { + mc.computeArea(); + } + mcs.area.set_area(mcs.area.get_area() + + mc.area.get_area() * XML->sys.mc.number_mcs); + area.set_area(area.get_area() + + mc.area.get_area() * XML->sys.mc.number_mcs); + } + + // Flash Controller Area: + if (XML->sys.flashc.number_mcs > 0) // flash controller + { + if (calc_area) { + flashcontroller.computeArea(); + } + double number_fcs = flashcontroller.fcp.num_mcs; + flashcontrollers.area.set_area(flashcontrollers.area.get_area() + + flashcontroller.area.get_area() * + number_fcs); + area.set_area(area.get_area() + flashcontrollers.area.get_area()); + } + + // Network Interface Unit Area + if (XML->sys.niu.number_units > 0) { + if (calc_area) { + niu.computeArea(); + } + nius.area.set_area(nius.area.get_area() + + niu.area.get_area() * XML->sys.niu.number_units); + area.set_area(area.get_area() + + niu.area.get_area() * XML->sys.niu.number_units); + } + + // PCIE Area + if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { + if (calc_area) { + pcie.computeArea(); + } + pcies.area.set_area(pcies.area.get_area() + + pcie.area.get_area() * XML->sys.pcie.number_units); + area.set_area(area.get_area() + + pcie.area.get_area() * XML->sys.pcie.number_units); + } +} + +void Processor::compute_power() { + int i; + double pppm_t[4] = {1, 1, 1, 1}; + + // Compute Core Power + for (i = 0; i < numCore; i++) { + cores[i]->computeEnergy(); + cores[i]->computeEnergy(false); + if (procdynp.homoCore) { set_pppm(pppm_t, cores[i]->clockRate * procdynp.numCore, procdynp.numCore, @@ -115,20 +454,9 @@ Processor::Processor(ParseXML *XML_interface) procdynp.numCore, procdynp.numCore); core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; - area.set_area(area.get_area() + - core.area.get_area()); // placement and routing overhead is - // 10%, core scales worse than cache - // 40% is accumulated from 90 to 22nm power = power + core.power; rt_power = rt_power + core.rt_power; } else { - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); - area.set_area( - area.get_area() + - cores[i]->area.get_area()); // placement and routing overhead is 10%, - // core scales worse than cache 40% is - // accumulated from 90 to 22nm - set_pppm(pppm_t, cores[i]->clockRate, 1, 1, 1); core.power = core.power + cores[i]->power * pppm_t; power = power + cores[i]->power * pppm_t; @@ -139,202 +467,138 @@ Processor::Processor(ParseXML *XML_interface) } } + // L2 Calc Power: if (!XML->sys.Private_L2) { if (numL2 > 0) { for (i = 0; i < numL2; i++) { - l2array.push_back(new SharedCache()); - l2array[i]->set_params(XML, i, &interface_ip); - l2array[i]->set_stats(XML); - l2array[i]->computeArea(); - l2array[i]->computeStaticPower(true); - l2array[i]->computeStaticPower(); + l2array[i].computeStaticPower(true); + l2array[i].computeStaticPower(); if (procdynp.homoL2) { - l2.area.set_area(l2.area.get_area() + - l2array[i]->area.get_area() * procdynp.numL2); set_pppm(pppm_t, - l2array[i]->cachep.clockRate * procdynp.numL2, + l2array[i].cachep.clockRate * procdynp.numL2, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.power = l2.power + l2array[i]->power * pppm_t; + l2.power = l2.power + l2array[i].power * pppm_t; set_pppm(pppm_t, - 1 / l2array[i]->cachep.executionTime, + 1 / l2array[i].cachep.executionTime, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; - area.set_area( - area.get_area() + - l2.area.get_area()); // placement and routing overhead is 10%, l2 - // scales worse than cache 40% is accumulated - // from 90 to 22nm + l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; power = power + l2.power; rt_power = rt_power + l2.rt_power; } else { - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); - area.set_area( - area.get_area() + - l2array[i] - ->area.get_area()); // placement and routing overhead is - // 10%, l2 scales worse than cache - // 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t, l2array[i]->cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i]->power * pppm_t; - power = power + l2array[i]->power * pppm_t; + set_pppm(pppm_t, l2array[i].cachep.clockRate, 1, 1, 1); + l2.power = l2.power + l2array[i].power * pppm_t; + power = power + l2array[i].power * pppm_t; ; - set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; - rt_power = rt_power + l2array[i]->rt_power * pppm_t; + set_pppm(pppm_t, 1 / l2array[i].cachep.executionTime, 1, 1, 1); + l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; + rt_power = rt_power + l2array[i].rt_power * pppm_t; } } } } + // L3 Power: if (numL3 > 0) { for (i = 0; i < numL3; i++) { - l3array.push_back(new SharedCache()); - l3array[i]->set_params(XML, i, &interface_ip, L3); - l3array[i]->set_stats(XML); - l3array[i]->computeArea(); - l3array[i]->computeStaticPower(true); - l3array[i]->computeStaticPower(); + l3array[i].computeStaticPower(true); + l3array[i].computeStaticPower(); if (procdynp.homoL3) { - l3.area.set_area(l3.area.get_area() + - l3array[i]->area.get_area() * procdynp.numL3); set_pppm(pppm_t, - l3array[i]->cachep.clockRate * procdynp.numL3, + l3array[i].cachep.clockRate * procdynp.numL3, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.power = l3.power + l3array[i]->power * pppm_t; + l3.power = l3.power + l3array[i].power * pppm_t; set_pppm(pppm_t, - 1 / l3array[i]->cachep.executionTime, + 1 / l3array[i].cachep.executionTime, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; - area.set_area(area.get_area() + - l3.area.get_area()); // placement and routing overhead is - // 10%, l3 scales worse than cache - // 40% is accumulated from 90 to 22nm + l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; power = power + l3.power; rt_power = rt_power + l3.rt_power; - } else { - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); - area.set_area( - area.get_area() + - l3array[i]->area.get_area()); // placement and routing overhead is - // 10%, l3 scales worse than cache 40% - // is accumulated from 90 to 22nm - set_pppm(pppm_t, l3array[i]->cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i]->power * pppm_t; - power = power + l3array[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; - rt_power = rt_power + l3array[i]->rt_power * pppm_t; + set_pppm(pppm_t, l3array[i].cachep.clockRate, 1, 1, 1); + l3.power = l3.power + l3array[i].power * pppm_t; + power = power + l3array[i].power * pppm_t; + set_pppm(pppm_t, 1 / l3array[i].cachep.executionTime, 1, 1, 1); + l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; + rt_power = rt_power + l3array[i].rt_power * pppm_t; } } } + + // L1 Dir Power: if (numL1Dir > 0) { for (i = 0; i < numL1Dir; i++) { - l1dirarray.push_back(new SharedCache()); - l1dirarray[i]->set_params(XML, i, &interface_ip, L1Directory); - l1dirarray[i]->set_stats(XML); - l1dirarray[i]->computeArea(); - l1dirarray[i]->computeStaticPower(true); - l1dirarray[i]->computeStaticPower(); + l1dirarray[i].computeStaticPower(true); + l1dirarray[i].computeStaticPower(); if (procdynp.homoL1Dir) { - l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i]->area.get_area() * procdynp.numL1Dir); set_pppm(pppm_t, - l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, + l1dirarray[i].cachep.clockRate * procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; + l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; set_pppm(pppm_t, - 1 / l1dirarray[i]->cachep.executionTime, + 1 / l1dirarray[i].cachep.executionTime, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; - area.set_area( - area.get_area() + - l1dir.area.get_area()); // placement and routing overhead is 10%, - // l1dir scales worse than cache 40% is - // accumulated from 90 to 22nm + l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; power = power + l1dir.power; rt_power = rt_power + l1dir.rt_power; } else { - l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); - set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; - power = power + l1dirarray[i]->power; - set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; - rt_power = rt_power + l1dirarray[i]->rt_power; + set_pppm(pppm_t, l1dirarray[i].cachep.clockRate, 1, 1, 1); + l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; + power = power + l1dirarray[i].power; + set_pppm(pppm_t, 1 / l1dirarray[i].cachep.executionTime, 1, 1, 1); + l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; + rt_power = rt_power + l1dirarray[i].rt_power; } } } - if (numL2Dir > 0) + + // L2 Dir Power + if (numL2Dir > 0) { for (i = 0; i < numL2Dir; i++) { - l2dirarray.push_back(new SharedCache()); - l2dirarray[i]->set_params(XML, i, &interface_ip, L2Directory); - l2dirarray[i]->set_stats(XML); - l2dirarray[i]->computeArea(); - l2dirarray[i]->computeStaticPower(true); - l2dirarray[i]->computeStaticPower(); + l2dirarray[i].computeStaticPower(true); + l2dirarray[i].computeStaticPower(); if (procdynp.homoL2Dir) { - l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i]->area.get_area() * procdynp.numL2Dir); set_pppm(pppm_t, - l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, + l2dirarray[i].cachep.clockRate * procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; + l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; set_pppm(pppm_t, - 1 / l2dirarray[i]->cachep.executionTime, + 1 / l2dirarray[i].cachep.executionTime, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; - area.set_area( - area.get_area() + - l2dir.area.get_area()); // placement and routing overhead is 10%, - // l2dir scales worse than cache 40% is - // accumulated from 90 to 22nm + l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; power = power + l2dir.power; rt_power = rt_power + l2dir.rt_power; } else { - l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); - set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; - power = power + l2dirarray[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; - rt_power = rt_power + l2dirarray[i]->rt_power * pppm_t; + set_pppm(pppm_t, l2dirarray[i].cachep.clockRate, 1, 1, 1); + l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; + power = power + l2dirarray[i].power * pppm_t; + set_pppm(pppm_t, 1 / l2dirarray[i].cachep.executionTime, 1, 1, 1); + l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; + rt_power = rt_power + l2dirarray[i].rt_power * pppm_t; } } + } + // MC Calc Power: if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - mc.set_params(XML, &interface_ip, MC); - mc.computeArea(); - mcs.area.set_area(mcs.area.get_area() + - mc.area.get_area() * XML->sys.mc.number_mcs); - area.set_area(area.get_area() + - mc.area.get_area() * XML->sys.mc.number_mcs); - mc.computeStaticPower(); - mc.set_stats(XML); mc.computeDynamicPower(); set_pppm(pppm_t, XML->sys.mc.number_mcs * mc.mcp.clockRate, @@ -352,18 +616,12 @@ Processor::Processor(ParseXML *XML_interface) rt_power = rt_power + mcs.rt_power; } + // Flash Controller Power: if (XML->sys.flashc.number_mcs > 0) // flash controller { - flashcontroller.set_params(XML, &interface_ip); - flashcontroller.set_stats(XML); - flashcontroller.computeArea(); flashcontroller.computeStaticPower(); flashcontroller.computeDynamicPower(); double number_fcs = flashcontroller.fcp.num_mcs; - flashcontrollers.area.set_area(flashcontrollers.area.get_area() + - flashcontroller.area.get_area() * - number_fcs); - area.set_area(area.get_area() + flashcontrollers.area.get_area()); set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); flashcontrollers.power = flashcontroller.power * pppm_t; power = power + flashcontrollers.power; @@ -372,21 +630,15 @@ Processor::Processor(ParseXML *XML_interface) rt_power = rt_power + flashcontrollers.rt_power; } + // Network Interface Unit Power if (XML->sys.niu.number_units > 0) { - niu.set_params(XML, &interface_ip); - niu.computeArea(); niu.computeStaticPower(); - nius.area.set_area(nius.area.get_area() + - niu.area.get_area() * XML->sys.niu.number_units); - area.set_area(area.get_area() + - niu.area.get_area() * XML->sys.niu.number_units); + niu.computeDynamicPower(); set_pppm(pppm_t, XML->sys.niu.number_units * niu.niup.clockRate, XML->sys.niu.number_units, XML->sys.niu.number_units, XML->sys.niu.number_units); - niu.set_stats(XML); - niu.computeDynamicPower(); nius.power = niu.power * pppm_t; power = power + nius.power; set_pppm(pppm_t, @@ -398,22 +650,15 @@ Processor::Processor(ParseXML *XML_interface) rt_power = rt_power + nius.rt_power; } + // PCIE Power if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - pcie.set_params(XML, &interface_ip); - pcie.computeArea(); - pcies.area.set_area(pcies.area.get_area() + - pcie.area.get_area() * XML->sys.pcie.number_units); - area.set_area(area.get_area() + - pcie.area.get_area() * XML->sys.pcie.number_units); + pcie.computeStaticPower(); + pcie.computeDynamicPower(); set_pppm(pppm_t, XML->sys.pcie.number_units * pcie.pciep.clockRate, XML->sys.pcie.number_units, XML->sys.pcie.number_units, XML->sys.pcie.number_units); - - pcie.set_stats(XML); - pcie.computeStaticPower(); - pcie.computeDynamicPower(); pcies.power = pcie.power * pppm_t; power = power + pcies.power; set_pppm(pppm_t, @@ -424,101 +669,6 @@ Processor::Processor(ParseXML *XML_interface) pcies.rt_power = pcie.rt_power * pppm_t; rt_power = rt_power + pcies.rt_power; } - - if (numNOC > 0) { - for (i = 0; i < numNOC; i++) { - if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used - nocs.push_back(new NoC(XML, i, &interface_ip, 1)); - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } else { // Bus based interconnect - nocs.push_back( - new NoC(XML, - i, - &interface_ip, - 1, - sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - } - - /* - * Compute global links associated with each NOC, if any. This must be done - * at the end (even after the NOC router part) since the total chip area - * must be obtain to decide the link routing - */ - for (i = 0; i < numNOC; i++) { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { - nocs[i]->init_link_bus( - sqrt(area.get_area() * - XML->sys.NoC[i].chip_coverage)); // compute global links - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); - area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); - } else { - noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); - area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); - } - } - } - // Compute energy of NoC (w or w/o links) or buses - for (i = 0; i < numNOC; i++) { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); - if (procdynp.homoNOC) { - set_pppm(pppm_t, - procdynp.numNOC * nocs[i]->nocdynp.clockRate, - procdynp.numNOC, - procdynp.numNOC, - procdynp.numNOC); - noc.power = noc.power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, - 1 / nocs[i]->nocdynp.executionTime, - procdynp.numNOC, - procdynp.numNOC, - procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; - power = power + noc.power; - rt_power = rt_power + noc.rt_power; - } else { - set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power * pppm_t; - power = power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; - rt_power = rt_power + nocs[i]->rt_power * pppm_t; - } - } - } - - // //clock power - // globalClock.init_wire_external(is_default, &interface_ip); - // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 - // globalClock.end_wiring_level =5;//toplevel metal - // globalClock.start_wiring_level =5;//toplevel metal - // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local - // final nodes globalClock.optimize_wire(); } void Processor::displayDeviceType(int device_type_, uint32_t indent) { @@ -913,26 +1063,26 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (!XML->sys.Private_L2) { for (i = 0; i < numL2; i++) { - l2array[i]->display(indent + 4, is_tdp); + l2array[i].display(indent + 4, is_tdp); cout << "************************************************************" "*****************************" << endl; } } for (i = 0; i < numL3; i++) { - l3array[i]->display(indent + 4, is_tdp); + l3array[i].display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL1Dir; i++) { - l1dirarray[i]->display(indent + 4, is_tdp); + l1dirarray[i].display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL2Dir; i++) { - l2dirarray[i]->display(indent + 4, is_tdp); + l2dirarray[i].display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1105,24 +1255,8 @@ Processor::~Processor() { delete cores.back(); cores.pop_back(); } - while (!l2array.empty()) { - delete l2array.back(); - l2array.pop_back(); - } - while (!l3array.empty()) { - delete l3array.back(); - l3array.pop_back(); - } while (!nocs.empty()) { delete nocs.back(); nocs.pop_back(); } - while (!l1dirarray.empty()) { - delete l1dirarray.back(); - l1dirarray.pop_back(); - } - while (!l2dirarray.empty()) { - delete l2dirarray.back(); - l2dirarray.pop_back(); - } }; diff --git a/src/processor.h b/src/processor.h index 416d4d4..cd61333 100644 --- a/src/processor.h +++ b/src/processor.h @@ -51,12 +51,12 @@ class Processor : public Component { public: - ParseXML *XML; + const ParseXML *XML; vector cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; + vector l2array; + vector l3array; + vector l1dirarray; + vector l2dirarray; vector nocs; MemoryController mc; NIUController niu; @@ -69,13 +69,19 @@ class Processor : public Component { Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies, flashcontrollers; int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface); - void compute(); - void set_proc_param(); + Processor(ParseXML *XML_interface, const bool calc_area = true); + void compute(ParseXML *XML_interface); + void create(const ParseXML *XML_interface, const bool calc_area = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void displayDeviceType(int device_type_, uint32_t indent = 0); void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); ~Processor(); + +private: + void set_proc_param(); + void init(); + void compute_area(const bool calc_area = true); + void compute_power(); }; #endif /* PROCESSOR_H_ */ From 29a6fc1e8b264882d14047b207c176bc4f21dc5f Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Sat, 20 Jun 2020 20:19:19 -0500 Subject: [PATCH 27/59] Fixed error in renaming.cc --- src/core/renaming_unit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index 8a0b292..0da9ccd 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -704,7 +704,7 @@ void RENAMINGU::computeArea(){ iRRAT.area.set_area(iRRAT.area.get_area() + iRRAT.local_result.area); area.set_area(area.get_area() + iRRAT.area.get_area()); - fFRAT.computeArea(); + fRRAT.computeArea(); fRRAT.area.set_area(fRRAT.area.get_area() + fRRAT.local_result.area); area.set_area(area.get_area() + fRRAT.area.get_area()); } From fc8c308e7d0308084afd62149348f0b38dc76be2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 20 Jun 2020 20:21:48 -0500 Subject: [PATCH 28/59] rename unit: Preparing to merge with Ram's rename unit code. --- src/core/core.cc | 7 +- src/core/exec_unit.h | 2 +- src/core/renaming_unit.cc | 1201 +++++++++++----------- src/core/renaming_unit.h | 38 +- src/core/scheduler.h | 2 +- src/logic/dep_resource_conflict_check.cc | 1 - src/logic/dff_cell.cc | 1 - src/logic/functional_unit.cc | 117 +-- src/logic/functional_unit.h | 3 +- src/logic/inst_decoder.cc | 1 - src/logic/pipeline.cc | 4 +- src/logic/undiff_core.cc | 48 +- src/memoryctrl/mc_frontend.h | 2 +- src/processor.cc | 700 +++++-------- src/processor.h | 22 +- 15 files changed, 980 insertions(+), 1169 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index 153c33d..f4ad7b1 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -85,7 +85,6 @@ Core::Core(const ParseXML *XML_interface, rnu->set_params(XML, ithCore, &interface_ip, coredynp); rnu->computeArea(); rnu->set_stats(XML); - rnu->computeStaticPower(); } corepipe = new Pipeline(&interface_ip, coredynp); @@ -160,7 +159,7 @@ void Core::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeDynamicPower(is_tdp); + rnu->computeStaticPower(is_tdp); set_pppm( pppm_t, coredynp.num_pipelines / num_units, @@ -250,7 +249,7 @@ void Core::computeEnergy(bool is_tdp) { if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeDynamicPower(is_tdp); + rnu->computeStaticPower(is_tdp); if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; @@ -434,7 +433,7 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << rnu->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - rnu->displayEnergy(indent + 4, plevel, is_tdp); + rnu->display(indent + 4, plevel, is_tdp); } } } diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 58ec374..fb60c90 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -35,8 +35,8 @@ #include "XML_Parse.h" #include "array.h" #include "basic_components.h" -#include "interconnect.h" #include "functional_unit.h" +#include "interconnect.h" #include "parameter.h" #include "regfile.h" #include "scheduler.h" diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index 27cda1d..619637e 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -44,74 +44,76 @@ #include RENAMINGU::RENAMINGU() { - init_params = false; init_stats = false; + init_params = false; + set_area = false; + long_channel = false; + power_gating = false; + fp_rename_writes = 0; + fp_rename_reads = 0; + rename_writes = 0; + rename_reads = 0; + int_instructions = 0; + fp_instructions = 0; } -void RENAMINGU::set_params(const ParseXML *XML_interface, +/* + * Although renaming logic maybe be used in in-order processors, McPAT + * assumes no renaming logic is used since the performance gain is very + * limited and the only major inorder processor with renaming logic is + * Itainium that is a VLIW processor and different from current McPAT's + * model. physical register base OOO must have Dual-RAT architecture or + * equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; i,f prefix mean int + * and fp RAT for all Renaming logic, random accessible checkpointing is + * used, but only update when instruction retires. FRAT will be read twice + * and written once per instruction; RRAT will be write once per instruction + * when committing and reads out all when context switch + * + * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, + * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, + * + * RAM-based RAT is duplicated/partitioned for each different hardware + * threads CAM-based RAT is shared for all hardware threads With SMT, RAT is + * partitioned and tagged. RAM-based RAT needs to have N (N-way SMT) sets of + * entries, with each set for a thread. The RAT control logic will determine + * different sets to use for different threads. But it does not need extra + * tag bits in the entries. However, CAM-based RAT need extra tag bits to + * distinguish the architecture register ids for different threads. + * + * checkpointing of RAT and RRAT are both for architecture state recovery + * with events including mis-speculation; Checkpointing is easier to + * implement in CAM than in RAM based RAT, despite of the inferior scalabilty + * of the CAM-based RATs. McPAT assumes at least 1 checkpoint for CAM-based + * RATs, and no more than 4 checkpoints (based on MIPS designs) for RAM based + * RATs, thus CAM-based RAT does not need RRAT Although no Dual-RAT is needed + * in RS-based OOO processors, since archi RegFile contains the committed + * register values, a RRAT or GC (not both) will speedup the mis-speculation + * recovery. Thus, when RAM-RAT does not have any GC, McPAT assumes the + * existence of a RRAT. + * + * RAM-base RAT does not need to scan/search all contents during instruction + * commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that + * is used for index the RAT entry to be updated. + * + * Both RAM and CAM have same DCL + */ +void RENAMINGU::set_params(const ParseXML *XML, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_) { - /* - * Although renaming logic maybe be used in in-order processors, -* McPAT assumes no renaming logic is used since the performance gain is very -limited and -* the only major inorder processor with renaming logic is Itainium -* that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent -structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but -only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all -when context switch - * - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * RAM-based RAT is duplicated/partitioned for each different hardware threads - * CAM-based RAT is shared for all hardware threads - * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N -(N-way SMT) sets of entries, with each set for a thread. - * The RAT control logic will determine different sets to use for different -threads. But it does not need extra tag bits in the entries. - * However, CAM-based RAT need extra tag bits to distinguish the architecture -register ids for different threads. - - * - * checkpointing of RAT and RRAT are both for architecture state recovery with -events including mis-speculation; - * Checkpointing is easier to implement in CAM than in RAM based RAT, despite -of the inferior scalabilty of the CAM-based RATs. - * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 -checkpoints (based on MIPS designs) for RAM based RATs, - * thus CAM-based RAT does not need RRAT - * Although no Dual-RAT is needed in RS-based OOO processors, since archi -RegFile contains the committed register values, - * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, -when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT. - * - * RAM-base RAT does not need to scan/search all contents during instruction -commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is -used for index the RAT entry to be updated. - * - * Both RAM and CAM have same DCL - * - - * - */ - XML = XML_interface; - interface_ip = *interface_ip_; - coredynp = dyn_p_; - ithCore = ithCore_; - + int tag = 0; + int data = 0; + int out_w = 0; exist = exist_; - - if (!exist) + if (!exist_) { return; - int tag, data, out_w; + } + ithCore = ithCore_; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; // interface_ip.wire_is_mat_type = 0; // interface_ip.wire_os_mat_type = 0; // interface_ip.wt = Global_30; @@ -147,11 +149,14 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT.set_params(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + // FRAT floating point data = int(ceil(coredynp.phy_freg_width * (1 + coredynp.globalCheckpoint) / 8.0)); @@ -177,11 +182,14 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT.set_params(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); + } else if (coredynp.rm_ty == CAMbased) { // FRAT tag = coredynp.arch_ireg_width + coredynp.hthread_width; @@ -213,11 +221,14 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT.set_params(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + // FRAT for FP tag = coredynp.arch_freg_width + coredynp.hthread_width; data = int( @@ -248,11 +259,13 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT.set_params(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); } // RRAT is always RAM based, does not have GCs, and is used only for @@ -288,11 +301,14 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT.set_params(&interface_ip, - "Int RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + // RRAT for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); interface_ip.is_cache = false; @@ -317,11 +333,13 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT.set_params(&interface_ip, - "FP RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); } // Freelist of renaming unit always RAM based and needed for RAM-based // RATs. Although it can be implemented within the CAM-based RAT, Current @@ -355,11 +373,14 @@ used for index the RAT entry to be updated. // every cycle, (coredynp.decodeW -1) inst may need to send back it dest // tags, committW insts needs to update freelist buffers interface_ip.num_se_rd_ports = 0; - ifreeL.set_params(&interface_ip, - "Int Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + ifreeL = new ArrayST(&interface_ip, + "Int Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + ifreeL->area.set_area(ifreeL->area.get_area() + + ifreeL->local_result.area); + area.set_area(area.get_area() + ifreeL->area.get_area()); // freelist for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); @@ -383,11 +404,14 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ffreeL.set_params(&interface_ip, - "FP Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + ffreeL = new ArrayST(&interface_ip, + "FP Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + ffreeL->area.set_area(ffreeL->area.get_area() + + ffreeL->local_result.area); + area.set_area(area.get_area() + ffreeL->area.get_area()); idcl = new dep_resource_conflict_check( &interface_ip, @@ -424,15 +448,19 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.decodeW; interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; - iFRAT.set_params(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - // iFRAT.local_result.power.readOp.dynamic *= + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->local_result.adjust_area(); + // iFRAT->local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO - // iFRAT.local_result.power.writeOp.dynamic + // iFRAT->local_result.power.writeOp.dynamic //*=1+0.2*0.05;//compensate for GC + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); + // FP data = int(ceil(coredynp.phy_freg_width * (1 + coredynp.globalCheckpoint) / 8.0)); @@ -458,15 +486,18 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = 2 * coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fFRAT.set_params(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - // fFRAT.local_result.power.readOp.dynamic *= + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->local_result.adjust_area(); + // fFRAT->local_result.power.readOp.dynamic *= // 1+0.2*0.05;//1+mis-speculation% TODO - // fFRAT.local_result.power.writeOp.dynamic + // fFRAT->local_result.power.writeOp.dynamic //*=1+0.2*0.05;//compensate for GC + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); } else if (coredynp.rm_ty == CAMbased) { // FRAT @@ -497,11 +528,13 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.decodeW; - iFRAT.set_params(&interface_ip, - "Int FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + iFRAT = new ArrayST(&interface_ip, + "Int FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iFRAT->area.set_area(iFRAT->area.get_area() + iFRAT->local_result.area); + area.set_area(area.get_area() + iFRAT->area.get_area()); // FRAT tag = coredynp.arch_freg_width + coredynp.hthread_width; @@ -532,11 +565,13 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 2 * coredynp.fp_decodeW; - fFRAT.set_params(&interface_ip, - "FP FrontRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + fFRAT = new ArrayST(&interface_ip, + "FP FrontRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fFRAT->area.set_area(fFRAT->area.get_area() + fFRAT->local_result.area); + area.set_area(area.get_area() + fFRAT->area.get_area()); } // Although no RRAT for RS based OOO is really needed since the archiRF // always holds the non-speculative data, having the RRAT or GC (not both) @@ -566,11 +601,14 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - iRRAT.set_params(&interface_ip, - "Int RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + iRRAT = new ArrayST(&interface_ip, + "Int RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + iRRAT->area.set_area(iRRAT->area.get_area() + iRRAT->local_result.area); + area.set_area(area.get_area() + iRRAT->area.get_area()); + // RRAT for FP data = int(ceil(coredynp.phy_freg_width / 8.0)); interface_ip.is_cache = false; @@ -595,11 +633,13 @@ used for index the RAT entry to be updated. interface_ip.num_rd_ports = coredynp.fp_decodeW; interface_ip.num_wr_ports = coredynp.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT.set_params(&interface_ip, - "FP RetireRAT", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + fRRAT = new ArrayST(&interface_ip, + "FP RetireRAT", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + fRRAT->area.set_area(fRRAT->area.get_area() + fRRAT->local_result.area); + area.set_area(area.get_area() + fRRAT->area.get_area()); } // Freelist of renaming unit of RS based OOO is unifed for both int and fp @@ -625,13 +665,14 @@ used for index the RAT entry to be updated. interface_ip.num_wr_ports = coredynp.decodeW - 1 + XML->sys.core[ithCore].commit_width; interface_ip.num_se_rd_ports = 0; - ifreeL.set_params(&interface_ip, - "Unified Free List", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - // ifreeL.area.set_area(ifreeL.area.get_area()+ - // ifreeL.local_result.area*XML->sys.core[ithCore].number_hardware_threads); + ifreeL = new ArrayST(&interface_ip, + "Unified Free List", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + // ifreeL->area.set_area(ifreeL->area.get_area()+ + // ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); + area.set_area(area.get_area() + ifreeL->area.get_area()); idcl = new dep_resource_conflict_check( &interface_ip, @@ -656,142 +697,37 @@ used for index the RAT entry to be updated. init_params = true; } -void RENAMINGU::computeArea() { +void RENAMINGU::set_stats(const ParseXML* XML) { + fp_rename_writes = XML->sys.core[ithCore].fp_rename_writes; + fp_rename_reads = XML->sys.core[ithCore].fp_rename_reads; + rename_writes = XML->sys.core[ithCore].rename_writes; + rename_reads = XML->sys.core[ithCore].rename_reads; + int_instructions = XML->sys.core[ithCore].int_instructions; + fp_instructions = XML->sys.core[ithCore].fp_instructions; + init_stats = true; +} +void RENAMINGU::computeArea() { if (!init_params) { std::cerr << "[ RENAMINGU ] Error: must set params before calling " "computeArea()\n"; - exit(1); } - if (coredynp.core_ty == OOO) { - if (coredynp.scheu_ty == PhysicalRegFile) { - if (coredynp.rm_ty == - RAMbased) { // FRAT with global checkpointing (GCs) please see paper - // tech report for detailed explanation. - iFRAT.computeArea(); - iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); - area.set_area(area.get_area() + iFRAT.area.get_area()); - - fFRAT.computeArea(); - fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); - area.set_area(area.get_area() + fFRAT.area.get_area()); - - } else if (coredynp.rm_ty == CAMbased) { - iFRAT.computeArea(); - iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); - area.set_area(area.get_area() + iFRAT.area.get_area()); - - fFRAT.computeArea(); - fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); - area.set_area(area.get_area() + fFRAT.area.get_area()); - } - - // RRAT is always RAM based, does not have GCs, and is used only for - // record latest non-speculative mapping RRAT is not needed for CAM-based - // RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is - // not needed for RAM-based RAT with checkpoints McPAT assumes renaming - // unit to have RRAT when there is no checkpoints in FRAT, while MIPS - // R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with - // FRAT is very costly, especially for high issue width and high clock - // rate. - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.computeArea(); - iRRAT.area.set_area(iRRAT.area.get_area() + iRRAT.local_result.area); - area.set_area(area.get_area() + iRRAT.area.get_area()); - - fFRAT.computeArea(); - fRRAT.area.set_area(fRRAT.area.get_area() + fRRAT.local_result.area); - area.set_area(area.get_area() + fRRAT.area.get_area()); - } - // Freelist of renaming unit always RAM based and needed for RAM-based - // RATs. Although it can be implemented within the CAM-based RAT, Current - // McPAT does not have the free bits in the CAM but use the same external - // free list as a close approximation for CAM RAT. Recycle happens at two - // places: 1)when DCL check there are WAW, the Phy-registers/ROB directly - // recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - // therefore num_wr port = decode-1(-1 means at least one phy reg will be - // used for the current renaming group) + commit width - ifreeL.computeArea(); - ifreeL.area.set_area(ifreeL.area.get_area() + ifreeL.local_result.area); - area.set_area(area.get_area() + ifreeL.area.get_area()); - - ffreeL.computeArea(); - ffreeL.area.set_area(ffreeL.area.get_area() + ffreeL.local_result.area); - area.set_area(area.get_area() + ffreeL.area.get_area()); - - } else if (coredynp.scheu_ty == ReservationStation) { - if (coredynp.rm_ty == RAMbased) { - iFRAT.computeArea(); - iFRAT.local_result.adjust_area(); - // iFRAT.local_result.power.readOp.dynamic *= - // 1+0.2*0.05;//1+mis-speculation% TODO - // iFRAT.local_result.power.writeOp.dynamic - //*=1+0.2*0.05;//compensate for GC - iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); - area.set_area(area.get_area() + iFRAT.area.get_area()); - - fFRAT.computeArea(); - fFRAT.local_result.adjust_area(); - // fFRAT.local_result.power.readOp.dynamic *= - // 1+0.2*0.05;//1+mis-speculation% TODO - // fFRAT.local_result.power.writeOp.dynamic - //*=1+0.2*0.05;//compensate for GC - fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); - area.set_area(area.get_area() + fFRAT.area.get_area()); - - } else if (coredynp.rm_ty == CAMbased) { - // FRAT - iFRAT.computeArea(); - iFRAT.area.set_area(iFRAT.area.get_area() + iFRAT.local_result.area); - area.set_area(area.get_area() + iFRAT.area.get_area()); - - // FRAT - fFRAT.computeArea(); - fFRAT.area.set_area(fFRAT.area.get_area() + fFRAT.local_result.area); - area.set_area(area.get_area() + fFRAT.area.get_area()); - } - // Although no RRAT for RS based OOO is really needed since the archiRF - // always holds the non-speculative data, having the RRAT or GC (not both) - // can help the recovery of mis-speculations. - - if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.computeArea(); - iRRAT.area.set_area(iRRAT.area.get_area() + iRRAT.local_result.area); - area.set_area(area.get_area() + iRRAT.area.get_area()); - - // RRAT for FP - fRRAT.computeArea(); - fRRAT.area.set_area(fRRAT.area.get_area() + fRRAT.local_result.area); - area.set_area(area.get_area() + fRRAT.area.get_area()); - } - - // Freelist of renaming unit of RS based OOO is unifed for both int and fp - // renaming unit since the ROB is unified - ifreeL.computeArea(); - // ifreeL.area.set_area(ifreeL.area.get_area()+ - // ifreeL.local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area() + ifreeL.area.get_area()); - } - } -} - -void RENAMINGU::computeStaticPower() { - // NOTE: this does nothing, as the static power is optimized - // along with the array area. + set_area = true; } -void RENAMINGU::set_stats(const ParseXML *XML) { init_stats = true; } - -void RENAMINGU::computeDynamicPower(bool is_tdp) { - if (!exist) +void RENAMINGU::computeStaticPower(bool is_tdp) { + if (!exist) { return; - if (!init_stats) { - std::cerr << "[ RENAMINGU ] Error: must set stats before calling " - "computeDynamicPower()\n"; - + } + if (!init_params) { + std::cerr << "[ RENAMINGU ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } + if (!set_area) { + std::cerr << "[ RENAMINGU ] Error: must computeArea before calling " + "computeStaticPower()\n"; exit(1); } double pppm_t[4] = {1, 1, 1, 1}; @@ -799,78 +735,78 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_rd_ports; - iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; - iFRAT.tdp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_rd_ports; - fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; - fFRAT.tdp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_search_ports; - iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; - iFRAT.tdp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_search_ports; - fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; - fFRAT.tdp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.stats_t.readAc.access = iRRAT.l_ip.num_rd_ports; - iRRAT.stats_t.writeAc.access = iRRAT.l_ip.num_wr_ports; - iRRAT.tdp_stats = iRRAT.stats_t; + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; - fRRAT.stats_t.readAc.access = fRRAT.l_ip.num_rd_ports; - fRRAT.stats_t.writeAc.access = fRRAT.l_ip.num_wr_ports; - fRRAT.tdp_stats = fRRAT.stats_t; + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; } - ifreeL.stats_t.readAc.access = - coredynp.decodeW; // ifreeL.l_ip.num_rd_ports;; - ifreeL.stats_t.writeAc.access = - coredynp.decodeW; // ifreeL.l_ip.num_wr_ports; - ifreeL.tdp_stats = ifreeL.stats_t; - - ffreeL.stats_t.readAc.access = - coredynp.decodeW; // ffreeL.l_ip.num_rd_ports; - ffreeL.stats_t.writeAc.access = - coredynp.decodeW; // ffreeL.l_ip.num_wr_ports; - ffreeL.tdp_stats = ffreeL.stats_t; + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports;; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; + + ffreeL->stats_t.readAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_rd_ports; + ffreeL->stats_t.writeAc.access = + coredynp.decodeW; // ffreeL->l_ip.num_wr_ports; + ffreeL->tdp_stats = ffreeL->stats_t; } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_rd_ports; - iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; - iFRAT.tdp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_rd_ports; - fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; - fFRAT.tdp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT.stats_t.readAc.access = iFRAT.l_ip.num_search_ports; - iFRAT.stats_t.writeAc.access = iFRAT.l_ip.num_wr_ports; - iFRAT.tdp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = fFRAT.l_ip.num_search_ports; - fFRAT.stats_t.writeAc.access = fFRAT.l_ip.num_wr_ports; - fFRAT.tdp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; + fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; + fFRAT->tdp_stats = fFRAT->stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.stats_t.readAc.access = iRRAT.l_ip.num_rd_ports; - iRRAT.stats_t.writeAc.access = iRRAT.l_ip.num_wr_ports; - iRRAT.tdp_stats = iRRAT.stats_t; + iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->tdp_stats = iRRAT->stats_t; - fRRAT.stats_t.readAc.access = fRRAT.l_ip.num_rd_ports; - fRRAT.stats_t.writeAc.access = fRRAT.l_ip.num_wr_ports; - fRRAT.tdp_stats = fRRAT.stats_t; + fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->tdp_stats = fRRAT->stats_t; } // Unified free list for both int and fp - ifreeL.stats_t.readAc.access = - coredynp.decodeW; // ifreeL.l_ip.num_rd_ports; - ifreeL.stats_t.writeAc.access = - coredynp.decodeW; // ifreeL.l_ip.num_wr_ports; - ifreeL.tdp_stats = ifreeL.stats_t; + ifreeL->stats_t.readAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_rd_ports; + ifreeL->stats_t.writeAc.access = + coredynp.decodeW; // ifreeL->l_ip.num_wr_ports; + ifreeL->tdp_stats = ifreeL->stats_t; } idcl->stats_t.readAc.access = coredynp.decodeW; fdcl->stats_t.readAc.access = coredynp.decodeW; @@ -889,114 +825,85 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT.rtp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = rename_reads; + iFRAT->stats_t.writeAc.access = rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT.rtp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fp_rename_reads; + fFRAT->stats_t.writeAc.access = fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT.rtp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = rename_reads; + iFRAT->stats_t.writeAc.access = rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT.rtp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fp_rename_reads; + fFRAT->stats_t.writeAc.access = fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.stats_t.readAc.access = - XML->sys.core[ithCore] - .rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - iRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT.rtp_stats = iRRAT.stats_t; - - fRRAT.stats_t.readAc.access = - XML->sys.core[ithCore] - .fp_rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - fRRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fRRAT.rtp_stats = fRRAT.stats_t; + // HACK, should be (context switch + branch mispredictions)*16 + iRRAT->stats_t.readAc.access = rename_writes; + iRRAT->stats_t.writeAc.access = rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + // HACK, should be (context switch + branch mispredictions)*16 + fRRAT->stats_t.readAc.access = fp_rename_writes; + fRRAT->stats_t.writeAc.access = fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; } - ifreeL.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL.stats_t.writeAc.access = - 2 * XML->sys.core[ithCore].rename_writes; - ifreeL.rtp_stats = ifreeL.stats_t; + ifreeL->stats_t.readAc.access = rename_reads; + ifreeL->stats_t.writeAc.access = 2*rename_writes; + ifreeL->rtp_stats = ifreeL->stats_t; - ffreeL.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL.stats_t.writeAc.access = - 2 * XML->sys.core[ithCore].fp_rename_writes; - ffreeL.rtp_stats = ffreeL.stats_t; + ffreeL->stats_t.readAc.access = fp_rename_reads; + ffreeL->stats_t.writeAc.access = 2*fp_rename_writes; + ffreeL->rtp_stats = ffreeL->stats_t; } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - // iFRAT.stats_t.searchAc.access = - // XML->sys.core[ithCore].committed_int_instructions;//hack: not all - // committed instructions use regs. - iFRAT.rtp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = rename_reads; + iFRAT->stats_t.writeAc.access = rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - // fFRAT.stats_t.searchAc.access = - // XML->sys.core[ithCore].committed_fp_instructions; - fFRAT.rtp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fp_rename_reads; + fFRAT->stats_t.writeAc.access = fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; } else if (coredynp.rm_ty == CAMbased) { - iFRAT.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT.rtp_stats = iFRAT.stats_t; + iFRAT->stats_t.readAc.access = rename_reads; + iFRAT->stats_t.writeAc.access = rename_writes; + iFRAT->rtp_stats = iFRAT->stats_t; - fFRAT.stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fFRAT.rtp_stats = fFRAT.stats_t; + fFRAT->stats_t.readAc.access = fp_rename_reads; + fFRAT->stats_t.writeAc.access = fp_rename_writes; + fFRAT->rtp_stats = fFRAT->stats_t; } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.stats_t.readAc.access = - XML->sys.core[ithCore] - .rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - iRRAT.stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT.rtp_stats = iRRAT.stats_t; - - fRRAT.stats_t.readAc.access = - XML->sys.core[ithCore] - .fp_rename_writes; // Hack, should be (context switch + branch - // mispredictions)*16 - fRRAT.stats_t.writeAc.access = - XML->sys.core[ithCore].fp_rename_writes; - fRRAT.rtp_stats = fRRAT.stats_t; + // HACK, should be (context switch + branch mispredictions)*16 + iRRAT->stats_t.readAc.access = rename_writes; + iRRAT->stats_t.writeAc.access = rename_writes; + iRRAT->rtp_stats = iRRAT->stats_t; + + // HACK, should be (context switch + branch mispredictions)*16 + fRRAT->stats_t.readAc.access = fp_rename_writes; + fRRAT->stats_t.writeAc.access = fp_rename_writes; + fRRAT->rtp_stats = fRRAT->stats_t; } // Unified free list for both int and fp since the ROB act as physcial // registers - ifreeL.stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL.stats_t.writeAc.access = - 2 * (XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore] - .fp_rename_writes); // HACK: 2-> since some of renaming in - // the same group are terminated early - ifreeL.rtp_stats = ifreeL.stats_t; + ifreeL->stats_t.readAc.access = rename_reads+fp_rename_reads; + // HACK: 2-> since some of renaming in the same group are terminated early + ifreeL->stats_t.writeAc.access = 2*(rename_writes+fp_rename_writes); + ifreeL->rtp_stats = ifreeL->stats_t; } - idcl->stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * - XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3 * coredynp.fp_issueW * - coredynp.fp_issueW * - XML->sys.core[ithCore].fp_rename_writes; + idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*rename_reads; + fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*fp_rename_writes; idcl->rtp_stats = idcl->stats_t; fdcl->rtp_stats = fdcl->stats_t; } else { if (coredynp.issueW > 1) { - idcl->stats_t.readAc.access = - 2 * XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; + idcl->stats_t.readAc.access = 2*int_instructions; + fdcl->stats_t.readAc.access = fp_instructions; idcl->rtp_stats = idcl->stats_t; fdcl->rtp_stats = fdcl->stats_t; } @@ -1006,122 +913,122 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { if (coredynp.rm_ty == RAMbased) { - iFRAT.power_t.reset(); - fFRAT.power_t.reset(); + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); - iFRAT.power_t.readOp.dynamic += - (iFRAT.stats_t.readAc.access * - (iFRAT.local_result.power.readOp.dynamic + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT.stats_t.writeAc.access * - iFRAT.local_result.power.writeOp.dynamic); - fFRAT.power_t.readOp.dynamic += - (fFRAT.stats_t.readAc.access * - (fFRAT.local_result.power.readOp.dynamic + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT.stats_t.writeAc.access * - fFRAT.local_result.power.writeOp.dynamic); + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { - iFRAT.power_t.reset(); - fFRAT.power_t.reset(); - iFRAT.power_t.readOp.dynamic += - (iFRAT.stats_t.readAc.access * - (iFRAT.local_result.power.searchOp.dynamic + + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT.stats_t.writeAc.access * - iFRAT.local_result.power.writeOp.dynamic); - fFRAT.power_t.readOp.dynamic += - (fFRAT.stats_t.readAc.access * - (fFRAT.local_result.power.searchOp.dynamic + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT.stats_t.writeAc.access * - fFRAT.local_result.power.writeOp.dynamic); + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.power_t.reset(); - fRRAT.power_t.reset(); - - iRRAT.power_t.readOp.dynamic += - (iRRAT.stats_t.readAc.access * - iRRAT.local_result.power.readOp.dynamic + - iRRAT.stats_t.writeAc.access * - iRRAT.local_result.power.writeOp.dynamic); - fRRAT.power_t.readOp.dynamic += - (fRRAT.stats_t.readAc.access * - fRRAT.local_result.power.readOp.dynamic + - fRRAT.stats_t.writeAc.access * - fRRAT.local_result.power.writeOp.dynamic); + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); } - ifreeL.power_t.reset(); - ffreeL.power_t.reset(); - ifreeL.power_t.readOp.dynamic += - (ifreeL.stats_t.readAc.access * - ifreeL.local_result.power.readOp.dynamic + - ifreeL.stats_t.writeAc.access * - ifreeL.local_result.power.writeOp.dynamic); - ffreeL.power_t.readOp.dynamic += - (ffreeL.stats_t.readAc.access * - ffreeL.local_result.power.readOp.dynamic + - ffreeL.stats_t.writeAc.access * - ffreeL.local_result.power.writeOp.dynamic); + ifreeL->power_t.reset(); + ffreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); + ffreeL->power_t.readOp.dynamic += + (ffreeL->stats_t.readAc.access * + ffreeL->local_result.power.readOp.dynamic + + ffreeL->stats_t.writeAc.access * + ffreeL->local_result.power.writeOp.dynamic); } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { - iFRAT.power_t.reset(); - fFRAT.power_t.reset(); + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); - iFRAT.power_t.readOp.dynamic += - (iFRAT.stats_t.readAc.access * - (iFRAT.local_result.power.readOp.dynamic + + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT.stats_t.writeAc.access * - iFRAT.local_result.power.writeOp.dynamic); - fFRAT.power_t.readOp.dynamic += - (fFRAT.stats_t.readAc.access * - (fFRAT.local_result.power.readOp.dynamic + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT.stats_t.writeAc.access * - fFRAT.local_result.power.writeOp.dynamic); + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { - iFRAT.power_t.reset(); - fFRAT.power_t.reset(); - iFRAT.power_t.readOp.dynamic += - (iFRAT.stats_t.readAc.access * - (iFRAT.local_result.power.searchOp.dynamic + + iFRAT->power_t.reset(); + fFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += + (iFRAT->stats_t.readAc.access * + (iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) + - iFRAT.stats_t.writeAc.access * - iFRAT.local_result.power.writeOp.dynamic); - fFRAT.power_t.readOp.dynamic += - (fFRAT.stats_t.readAc.access * - (fFRAT.local_result.power.searchOp.dynamic + + iFRAT->stats_t.writeAc.access * + iFRAT->local_result.power.writeOp.dynamic); + fFRAT->power_t.readOp.dynamic += + (fFRAT->stats_t.readAc.access * + (fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) + - fFRAT.stats_t.writeAc.access * - fFRAT.local_result.power.writeOp.dynamic); + fFRAT->stats_t.writeAc.access * + fFRAT->local_result.power.writeOp.dynamic); } if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.power_t.reset(); - fRRAT.power_t.reset(); - - iRRAT.power_t.readOp.dynamic += - (iRRAT.stats_t.readAc.access * - iRRAT.local_result.power.readOp.dynamic + - iRRAT.stats_t.writeAc.access * - iRRAT.local_result.power.writeOp.dynamic); - fRRAT.power_t.readOp.dynamic += - (fRRAT.stats_t.readAc.access * - fRRAT.local_result.power.readOp.dynamic + - fRRAT.stats_t.writeAc.access * - fRRAT.local_result.power.writeOp.dynamic); + iRRAT->power_t.reset(); + fRRAT->power_t.reset(); + + iRRAT->power_t.readOp.dynamic += + (iRRAT->stats_t.readAc.access * + iRRAT->local_result.power.readOp.dynamic + + iRRAT->stats_t.writeAc.access * + iRRAT->local_result.power.writeOp.dynamic); + fRRAT->power_t.readOp.dynamic += + (fRRAT->stats_t.readAc.access * + fRRAT->local_result.power.readOp.dynamic + + fRRAT->stats_t.writeAc.access * + fRRAT->local_result.power.writeOp.dynamic); } - ifreeL.power_t.reset(); - ifreeL.power_t.readOp.dynamic += - (ifreeL.stats_t.readAc.access * - ifreeL.local_result.power.readOp.dynamic + - ifreeL.stats_t.writeAc.access * - ifreeL.local_result.power.writeOp.dynamic); + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + (ifreeL->stats_t.readAc.access * + ifreeL->local_result.power.readOp.dynamic + + ifreeL->stats_t.writeAc.access * + ifreeL->local_result.power.writeOp.dynamic); } } else { @@ -1147,32 +1054,32 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { if (is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; - fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; - ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; - ffreeL.power = ffreeL.power_t + ffreeL.local_result.power; + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->power = ffreeL->power_t + ffreeL->local_result.power; power = power + - (iFRAT.power + fFRAT.power) - //+ (iRRAT.power + fRRAT.power) - + (ifreeL.power + ffreeL.power); + (iFRAT->power + fFRAT->power) + //+ (iRRAT->power + fRRAT->power) + + (ifreeL->power + ffreeL->power); if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.power = iRRAT.power_t + iRRAT.local_result.power; - fRRAT.power = fRRAT.power_t + fRRAT.local_result.power; - power = power + (iRRAT.power + fRRAT.power); + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); } } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; - fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; - ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; - power = power + (iFRAT.power + fFRAT.power) + ifreeL.power; + iFRAT->power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->power = ifreeL->power_t + ifreeL->local_result.power; + power = power + (iFRAT->power + fFRAT->power) + ifreeL->power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.power = iRRAT.power_t + iRRAT.local_result.power; - fRRAT.power = fRRAT.power_t + fRRAT.local_result.power; - power = power + (iRRAT.power + fRRAT.power); + iRRAT->power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->power = fRRAT->power_t + fRRAT->local_result.power; + power = power + (iRRAT->power + fRRAT->power); } } } else { @@ -1182,36 +1089,36 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { } else { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT.rt_power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; - fFRAT.rt_power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; - ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; - ffreeL.rt_power = ffreeL.power_t + ffreeL.local_result.power; + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; + ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power; rt_power = rt_power + - (iFRAT.rt_power + fFRAT.rt_power) - // + (iRRAT.rt_power + - // fRRAT.rt_power) - + (ifreeL.rt_power + ffreeL.rt_power); + (iFRAT->rt_power + fFRAT->rt_power) + // + (iRRAT->rt_power + + // fRRAT->rt_power) + + (ifreeL->rt_power + ffreeL->rt_power); if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.rt_power = iRRAT.power_t + iRRAT.local_result.power; - fRRAT.rt_power = fRRAT.power_t + fRRAT.local_result.power; - rt_power = rt_power + (iRRAT.rt_power + fRRAT.rt_power); + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); } } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT.rt_power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; - fFRAT.rt_power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; - ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; + iFRAT->rt_power = + iFRAT->power_t + (iFRAT->local_result.power) + idcl->power_t; + fFRAT->rt_power = + fFRAT->power_t + (fFRAT->local_result.power) + fdcl->power_t; + ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power; rt_power = - rt_power + (iFRAT.rt_power + fFRAT.rt_power) + ifreeL.rt_power; + rt_power + (iFRAT->rt_power + fFRAT->rt_power) + ifreeL->rt_power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { - iRRAT.rt_power = iRRAT.power_t + iRRAT.local_result.power; - fRRAT.rt_power = fRRAT.power_t + fRRAT.local_result.power; - rt_power = rt_power + (iRRAT.rt_power + fRRAT.rt_power); + iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power; + fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power; + rt_power = rt_power + (iRRAT->rt_power + fRRAT->rt_power); } } } else { @@ -1220,155 +1127,159 @@ void RENAMINGU::computeDynamicPower(bool is_tdp) { } } -void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { - if (!exist) +void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { + if (!exist) { return; + } string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; if (is_tdp) { if (coredynp.core_ty == OOO) { cout << indent_str << "Int Front End RAT with " << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << iFRAT.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << iFRAT->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << iFRAT.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << iFRAT->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iFRAT.power.readOp.longer_channel_leakage - : iFRAT.power.readOp.leakage) + << (long_channel ? iFRAT->power.readOp.longer_channel_leakage + : iFRAT->power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? iFRAT.power.readOp.power_gated_with_long_channel_leakage - : iFRAT.power.readOp.power_gated_leakage) + ? iFRAT->power.readOp.power_gated_with_long_channel_leakage + : iFRAT->power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << iFRAT.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << iFRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; + << iFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "FP Front End RAT with " << coredynp.globalCheckpoint << " internal checkpoints:" << endl; - cout << indent_str_next << "Area = " << fFRAT.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << fFRAT->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << fFRAT.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << fFRAT->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fFRAT.power.readOp.longer_channel_leakage - : fFRAT.power.readOp.leakage) + << (long_channel ? fFRAT->power.readOp.longer_channel_leakage + : fFRAT->power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? fFRAT.power.readOp.power_gated_with_long_channel_leakage - : fFRAT.power.readOp.power_gated_leakage) + ? fFRAT->power.readOp.power_gated_with_long_channel_leakage + : fFRAT->power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << fFRAT.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fFRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; + << fFRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Free List:" << endl; - cout << indent_str_next << "Area = " << ifreeL.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ifreeL->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ifreeL.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << ifreeL->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ifreeL.power.readOp.longer_channel_leakage - : ifreeL.power.readOp.leakage) + << (long_channel ? ifreeL->power.readOp.longer_channel_leakage + : ifreeL->power.readOp.leakage) << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? ifreeL.power.readOp.power_gated_with_long_channel_leakage - : ifreeL.power.readOp.power_gated_leakage) - << " W" << endl; + if (power_gating) { + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? ifreeL->power.readOp.power_gated_with_long_channel_leakage + : ifreeL->power.readOp.power_gated_leakage) + << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << ifreeL.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ifreeL.rt_power.readOp.dynamic / executionTime << " W" << endl; + << ifreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { cout << indent_str << "Int Retire RAT: " << endl; - cout << indent_str_next << "Area = " << iRRAT.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << iRRAT->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << iRRAT.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << iRRAT->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? iRRAT.power.readOp.longer_channel_leakage - : iRRAT.power.readOp.leakage) + << (long_channel ? iRRAT->power.readOp.longer_channel_leakage + : iRRAT->power.readOp.leakage) << " W" << endl; - if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? iRRAT.power.readOp.power_gated_with_long_channel_leakage - : iRRAT.power.readOp.power_gated_leakage) - << " W" << endl; + if (power_gating) { + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? iRRAT->power.readOp + .power_gated_with_long_channel_leakage + : iRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << iRRAT.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << iRRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; + << iRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "FP Retire RAT:" << endl; - cout << indent_str_next << "Area = " << fRRAT.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << fRRAT->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << fRRAT.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << fRRAT->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fRRAT.power.readOp.longer_channel_leakage - : fRRAT.power.readOp.leakage) + << (long_channel ? fRRAT->power.readOp.longer_channel_leakage + : fRRAT->power.readOp.leakage) << " W" << endl; - if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? fRRAT.power.readOp.power_gated_with_long_channel_leakage - : fRRAT.power.readOp.power_gated_leakage) - << " W" << endl; + if (power_gating) { + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel ? fRRAT->power.readOp + .power_gated_with_long_channel_leakage + : fRRAT->power.readOp.power_gated_leakage) + << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << fRRAT.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fRRAT.rt_power.readOp.dynamic / executionTime << " W" << endl; + << fRRAT->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } if (coredynp.scheu_ty == PhysicalRegFile) { cout << indent_str << "FP Free List:" << endl; - cout << indent_str_next << "Area = " << ffreeL.area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ffreeL->area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ffreeL.power.readOp.dynamic * clockRate + << "Peak Dynamic = " << ffreeL->power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ffreeL.power.readOp.longer_channel_leakage - : ffreeL.power.readOp.leakage) + << (long_channel ? ffreeL->power.readOp.longer_channel_leakage + : ffreeL->power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? ffreeL.power.readOp + << (long_channel ? ffreeL->power.readOp .power_gated_with_long_channel_leakage - : ffreeL.power.readOp.power_gated_leakage) + : ffreeL->power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str_next - << "Gate Leakage = " << ffreeL.power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ffreeL.rt_power.readOp.dynamic / executionTime << " W" << endl; + << ffreeL->rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else { @@ -1380,12 +1291,13 @@ void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << (long_channel ? idcl->power.readOp.longer_channel_leakage : idcl->power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? idcl->power.readOp.power_gated_with_long_channel_leakage : idcl->power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl; @@ -1399,12 +1311,13 @@ void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << (long_channel ? fdcl->power.readOp.longer_channel_leakage : fdcl->power.readOp.leakage) << " W" << endl; - if (power_gating) + if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? fdcl->power.readOp.power_gated_with_long_channel_leakage : fdcl->power.readOp.power_gated_leakage) << " W" << endl; + } cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl; @@ -1414,44 +1327,44 @@ void RENAMINGU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } else { if (coredynp.core_ty == OOO) { cout << indent_str_next << "Int Front End RAT Peak Dynamic = " - << iFRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; + << iFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " - << iFRAT.rt_power.readOp.leakage << " W" << endl; + << iFRAT->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Int Front End RAT Gate Leakage = " - << iFRAT.rt_power.readOp.gate_leakage << " W" << endl; + << iFRAT->rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP Front End RAT Peak Dynamic = " - << fFRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; + << fFRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " - << fFRAT.rt_power.readOp.leakage << " W" << endl; + << fFRAT->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Front End RAT Gate Leakage = " - << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Free List Peak Dynamic = " - << ifreeL.rt_power.readOp.dynamic * clockRate << " W" << endl; + << ifreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Free List Subthreshold Leakage = " - << ifreeL.rt_power.readOp.leakage << " W" << endl; + << ifreeL->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Free List Gate Leakage = " - << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; if (coredynp.scheu_ty == PhysicalRegFile) { if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { cout << indent_str_next << "Int Retire RAT Peak Dynamic = " - << iRRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; + << iRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " - << iRRAT.rt_power.readOp.leakage << " W" << endl; + << iRRAT->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Int Retire RAT Gate Leakage = " - << iRRAT.rt_power.readOp.gate_leakage << " W" << endl; + << iRRAT->rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP Retire RAT Peak Dynamic = " - << fRRAT.rt_power.readOp.dynamic * clockRate << " W" << endl; + << fRRAT->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " - << fRRAT.rt_power.readOp.leakage << " W" << endl; + << fRRAT->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Retire RAT Gate Leakage = " - << fRRAT.rt_power.readOp.gate_leakage << " W" << endl; + << fRRAT->rt_power.readOp.gate_leakage << " W" << endl; } cout << indent_str_next << "FP Free List Peak Dynamic = " - << ffreeL.rt_power.readOp.dynamic * clockRate << " W" << endl; + << ffreeL->rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP Free List Subthreshold Leakage = " - << ffreeL.rt_power.readOp.leakage << " W" << endl; + << ffreeL->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "FP Free List Gate Leakage = " - << fFRAT.rt_power.readOp.gate_leakage << " W" << endl; + << fFRAT->rt_power.readOp.gate_leakage << " W" << endl; } } else { cout << indent_str_next << "Int DCL Peak Dynamic = " @@ -1476,12 +1389,44 @@ RENAMINGU ::~RENAMINGU() { if (!exist) return; + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (iRRAT) { + delete iRRAT; + iRRAT = 0; + } + if (iFRAT) { + delete iFRAT; + iFRAT = 0; + } + if (ifreeL) { + delete ifreeL; + ifreeL = 0; + } if (idcl) { delete idcl; idcl = 0; } + if (fFRAT) { + delete fFRAT; + fFRAT = 0; + } + if (fRRAT) { + delete fRRAT; + fRRAT = 0; + } if (fdcl) { delete fdcl; fdcl = 0; } + if (ffreeL) { + delete ffreeL; + ffreeL = 0; + } + if (RAHT) { + delete RAHT; + RAHT = 0; + } } diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index a8d7f05..ca22eab 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -35,45 +35,53 @@ #include "XML_Parse.h" #include "array.h" #include "basic_components.h" -#include "dep_resource_conflict_check.h" #include "interconnect.h" +#include "dep_resource_conflict_check.h" #include "parameter.h" class RENAMINGU : public Component { public: - const ParseXML *XML; int ithCore; InputParameter interface_ip; double clockRate; double executionTime; CoreDynParam coredynp; - ArrayST iFRAT; - ArrayST fFRAT; - ArrayST iRRAT; - ArrayST fRRAT; - ArrayST ifreeL; - ArrayST ffreeL; + ArrayST *iFRAT; + ArrayST *fFRAT; + ArrayST *iRRAT; + ArrayST *fRRAT; + ArrayST *ifreeL; + ArrayST *ffreeL; dep_resource_conflict_check *idcl; dep_resource_conflict_check *fdcl; - ArrayST RAHT; // register alias history table Used to store GC + ArrayST *RAHT; // register alias history table Used to store GC bool exist; RENAMINGU(); - void set_params(const ParseXML *XML_interface, + void set_params(const ParseXML *XML, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exist_ = true); void set_stats(const ParseXML *XML); void computeArea(); - void computeStaticPower(); - void computeDynamicPower(bool is_tdp); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void computeStaticPower(bool is_tdp = true); + void computeDynamicPower(); // TODO; add this + void display(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~RENAMINGU(); - private: - bool init_params; bool init_stats; + bool init_params; + bool set_area; + bool long_channel; + bool power_gating; + + unsigned int fp_rename_writes; + unsigned int fp_rename_reads; + unsigned int rename_writes; + unsigned int rename_reads; + unsigned int int_instructions; + unsigned int fp_instructions; }; #endif // __RENAMING_U_H__ diff --git a/src/core/scheduler.h b/src/core/scheduler.h index dc7d81c..866cae9 100644 --- a/src/core/scheduler.h +++ b/src/core/scheduler.h @@ -36,8 +36,8 @@ #include "array.h" #include "basic_components.h" #include "interconnect.h" -#include "selection_logic.h" #include "parameter.h" +#include "selection_logic.h" class SchedulerU : public Component { public: diff --git a/src/logic/dep_resource_conflict_check.cc b/src/logic/dep_resource_conflict_check.cc index a7f8c3f..303c0ab 100644 --- a/src/logic/dep_resource_conflict_check.cc +++ b/src/logic/dep_resource_conflict_check.cc @@ -141,4 +141,3 @@ void dep_resource_conflict_check::leakage_feedback(double temperature) { power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; } - diff --git a/src/logic/dff_cell.cc b/src/logic/dff_cell.cc index 8224ceb..df730c8 100644 --- a/src/logic/dff_cell.cc +++ b/src/logic/dff_cell.cc @@ -96,4 +96,3 @@ void DFFCell::compute_DFF_cell() { g_tp.peri_global.Vdd; // printf("leakage =%E\n",cmos_Ileak(1, is_dram) ); } - diff --git a/src/logic/functional_unit.cc b/src/logic/functional_unit.cc index 2ac4f78..c560f0a 100644 --- a/src/logic/functional_unit.cc +++ b/src/logic/functional_unit.cc @@ -36,8 +36,8 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, enum FU_type fu_type_) - : ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), fu_type(fu_type_) { + : ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), + fu_type(fu_type_) { long_channel = false; power_gating = false; @@ -275,7 +275,6 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, } set_stats(XML); - // IEXEU, simple ALU and FPU // double C_ALU, C_EXEU, C_FPU; //Lum Equivalent capacitance of IEXEU and // FPU. Based on Intel and Sun 90nm process fabracation. @@ -290,7 +289,7 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, area.set_area(area.get_area() * macro_layout_overhead); } -void FunctionalUnit::set_stats(const ParseXML* XML) { +void FunctionalUnit::set_stats(const ParseXML *XML) { mul_accesses = XML->sys.core[ithCore].mul_accesses; ialu_accesses = XML->sys.core[ithCore].ialu_accesses; fpu_accesses = XML->sys.core[ithCore].fpu_accesses; @@ -370,86 +369,89 @@ void FunctionalUnit::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (is_tdp) { if (fu_type == FPU) { std::cout << indent_str - << "Floating Point Units (FPUs) (Count: " << coredynp.num_fpus - << " ):" << std::endl; - std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + << "Floating Point Units (FPUs) (Count: " << coredynp.num_fpus + << " ):" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" - << std::endl; + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << std::endl; // std::cout << indent_str_next << "Subthreshold Leakage //= " << power.readOp.leakage << " W" << std::endl; std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) { - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; } - std::cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << std::endl; std::cout << indent_str_next - << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime - << " W" << std::endl; + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / executionTime << " W" << std::endl; std::cout << std::endl; } else if (fu_type == ALU) { std::cout << indent_str << "Integer ALUs (Count: " << coredynp.num_alus - << " ):" << std::endl; - std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + << " ):" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" - << std::endl; + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << std::endl; // std::cout << indent_str_next << "Subthreshold Leakage //= " << power.readOp.leakage << " W" << std::endl; std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) { - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; } - std::cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << std::endl; std::cout << indent_str_next - << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime - << " W" << std::endl; + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / executionTime << " W" << std::endl; std::cout << std::endl; } else if (fu_type == MUL) { std::cout << indent_str - << "Complex ALUs (Mul/Div) (Count: " << coredynp.num_muls - << " ):" << std::endl; - std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + << "Complex ALUs (Mul/Div) (Count: " << coredynp.num_muls + << " ):" << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" - << std::endl; + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << std::endl; // std::cout << indent_str_next << "Subthreshold Leakage //= " << power.readOp.leakage << " W" << std::endl; std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) { - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; } - std::cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << std::endl; std::cout << indent_str_next - << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime - << " W" << std::endl; + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << rt_power.readOp.dynamic / executionTime << " W" << std::endl; std::cout << std::endl; } @@ -532,4 +534,3 @@ void FunctionalUnit::leakage_feedback(double temperature) { double pg_reduction = power_gating_leakage_reduction(false); power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; } - diff --git a/src/logic/functional_unit.h b/src/logic/functional_unit.h index caefc36..e569f8b 100644 --- a/src/logic/functional_unit.h +++ b/src/logic/functional_unit.h @@ -72,6 +72,7 @@ class FunctionalUnit : public Component { void computeEnergy(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void leakage_feedback(double temperature); + private: bool long_channel; bool power_gating; @@ -81,8 +82,6 @@ class FunctionalUnit : public Component { unsigned int mul_accesses; unsigned int ialu_accesses; unsigned int fpu_accesses; - - }; #endif // __FUNCTIONAL_UNIT_H__ diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index 49e2f43..a7472df 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -224,4 +224,3 @@ inst_decoder::~inst_decoder() { delete pre_dec->drv2; delete pre_dec; } - diff --git a/src/logic/pipeline.cc b/src/logic/pipeline.cc index 0e544f4..7d9a71a 100644 --- a/src/logic/pipeline.cc +++ b/src/logic/pipeline.cc @@ -30,6 +30,7 @@ ***************************************************************************/ #include "pipeline.h" + #include "dff_cell.h" Pipeline::Pipeline(const InputParameter *configure_interface, @@ -239,6 +240,3 @@ void Pipeline::compute_stage_vector() { } } } - - - diff --git a/src/logic/undiff_core.cc b/src/logic/undiff_core.cc index f8176d5..97541cd 100644 --- a/src/logic/undiff_core.cc +++ b/src/logic/undiff_core.cc @@ -161,41 +161,45 @@ void UndiffCore::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (is_tdp) { std::cout << indent_str << "UndiffCore:" << std::endl; - std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" - << std::endl; + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << std::endl; // std::cout << indent_str_next << "Subthreshold Leakage = " << // power.readOp.leakage <<" W" << std::endl; std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << std::endl; - std::cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << std::endl; + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + std::cout << indent_str_next + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; // std::cout << indent_str_next << "Runtime Dynamic = " << // rt_power.readOp.dynamic/executionTime << " W" << std::endl; std::cout << std::endl; } else { std::cout << indent_str << "UndiffCore:" << std::endl; - std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + std::cout << indent_str_next << "Area = " << area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" + << std::endl; std::cout << indent_str_next - << "Peak Dynamic = " << power.readOp.dynamic * clockRate << " W" - << std::endl; - std::cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage - << " W" << std::endl; - std::cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage - << " W" << std::endl; + << "Subthreshold Leakage = " << power.readOp.leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; // std::cout << indent_str_next << "Runtime Dynamic = " << // rt_power.readOp.dynamic/executionTime << " W" << std::endl; std::cout << std::endl; } } - diff --git a/src/memoryctrl/mc_frontend.h b/src/memoryctrl/mc_frontend.h index 2cdffef..c0547c0 100644 --- a/src/memoryctrl/mc_frontend.h +++ b/src/memoryctrl/mc_frontend.h @@ -35,8 +35,8 @@ #include "XML_Parse.h" #include "array.h" #include "basic_components.h" -#include "selection_logic.h" #include "parameter.h" +#include "selection_logic.h" #include diff --git a/src/processor.cc b/src/processor.cc index 6b4a706..ab4992e 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -45,123 +45,15 @@ #include #include -Processor::Processor(ParseXML *XML_interface, const bool calc_area) +Processor::Processor(ParseXML *XML_interface) : XML(XML_interface) { // TODO: using one global copy may have problems. /* * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory * controller on chip, thus McPAT only support homogeneous memory controllers. */ - create(XML_interface, calc_area); -} - -void Processor::create(const ParseXML *XML_interface, const bool calc_area) { int i; double pppm_t[4] = {1, 1, 1, 1}; - - this->XML = XML_interface; - init(); - compute_area(calc_area); - compute_power(); - - if (numNOC > 0) { - for (i = 0; i < numNOC; i++) { - if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used - nocs.push_back(new NoC(XML, i, &interface_ip, 1)); - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } else { // Bus based interconnect - nocs.push_back( - new NoC(XML, - i, - &interface_ip, - 1, - sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - } - - /* - * Compute global links associated with each NOC, if any. This must be done - * at the end (even after the NOC router part) since the total chip area - * must be obtain to decide the link routing - */ - for (i = 0; i < numNOC; i++) { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { - nocs[i]->init_link_bus( - sqrt(area.get_area() * - XML->sys.NoC[i].chip_coverage)); // compute global links - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); - area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); - } else { - noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); - area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); - } - } - } - // Compute energy of NoC (w or w/o links) or buses - for (i = 0; i < numNOC; i++) { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); - if (procdynp.homoNOC) { - set_pppm(pppm_t, - procdynp.numNOC * nocs[i]->nocdynp.clockRate, - procdynp.numNOC, - procdynp.numNOC, - procdynp.numNOC); - noc.power = noc.power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, - 1 / nocs[i]->nocdynp.executionTime, - procdynp.numNOC, - procdynp.numNOC, - procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; - power = power + noc.power; - rt_power = rt_power + noc.rt_power; - } else { - set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power * pppm_t; - power = power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; - rt_power = rt_power + nocs[i]->rt_power * pppm_t; - } - } - } - - // //clock power - // globalClock.init_wire_external(is_default, &interface_ip); - // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 - // globalClock.end_wiring_level =5;//toplevel metal - // globalClock.start_wiring_level =5;//toplevel metal - // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local - // final nodes globalClock.optimize_wire(); -} - -void Processor::init() { - int i; set_proc_param(); if (procdynp.homoCore) numCore = procdynp.numCore == 0 ? 0 : 1; @@ -174,10 +66,8 @@ void Processor::init() { numL2 = procdynp.numL2; if (XML->sys.Private_L2 && numCore != numL2) { - std::cerr << "[ Processor ] Error: Number of private L2 does not match " - "number of cores" - << endl; - exit(1); + cout << "Number of private L2 does not match number of cores" << endl; + exit(0); } if (procdynp.homoL3) @@ -190,6 +80,12 @@ void Processor::init() { else numNOC = procdynp.numNOC; + // if (!procdynp.homoNOC) + // { + // cout<<"Current McPAT does not support heterogeneous NOC"<sys.Private_L2) { - if (numL2 > 0) { - for (i = 0; i < numL2; i++) { - l2array.push_back(SharedCache()); - l2array[i].set_params(XML, i, &interface_ip); - l2array[i].set_stats(XML); - } - } - } - - // L3: - if (numL3 > 0) { - for (i = 0; i < numL3; i++) { - l3array.push_back(SharedCache()); - l3array[i].set_params(XML, i, &interface_ip, L3); - l3array[i].set_stats(XML); - } - } - - // L1 Dir: - if (numL1Dir > 0) { - for (i = 0; i < numL1Dir; i++) { - l1dirarray.push_back(SharedCache()); - l1dirarray[i].set_params(XML, i, &interface_ip, L1Directory); - l1dirarray[i].set_stats(XML); - } - } - - // L2 Dir: - if (numL2Dir > 0) { - for (i = 0; i < numL2Dir; i++) { - l2dirarray.push_back(SharedCache()); - l2dirarray[i].set_params(XML, i, &interface_ip, L2Directory); - l2dirarray[i].set_stats(XML); - } - } - - // MC: - if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - mc.set_params(XML, &interface_ip, MC); - mc.set_stats(XML); - } - - // Flash Controller Init: - if (XML->sys.flashc.number_mcs > 0) // flash controller - { - flashcontroller.set_params(XML, &interface_ip); - flashcontroller.set_stats(XML); - } - - // Network Interface Unit Init - if (XML->sys.niu.number_units > 0) { - niu.set_params(XML, &interface_ip); - niu.set_stats(XML); - } - - // PCIE Init - if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - pcie.set_params(XML, &interface_ip); - pcie.set_stats(XML); - } - - // TODO: Noc Init -} - -void Processor::compute_area(const bool calc_area) { - int i; - double pppm_t[4] = {1, 1, 1, 1}; - - // Compute Area: - for (i = 0; i < numCore; i++) { - if (procdynp.homoCore) { - core.area.set_area(core.area.get_area() + - cores[i]->area.get_area() * procdynp.numCore); - area.set_area(area.get_area() + - core.area.get_area()); // placement and routing overhead is - // 10%, core scales worse than cache - // 40% is accumulated from 90 to 22nm - } else { - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); - area.set_area( - area.get_area() + - cores[i]->area.get_area()); // placement and routing overhead is 10%, - // core scales worse than cache 40% is - // accumulated from 90 to 22nm - } - } - - // L2 Calc Area: - if (!XML->sys.Private_L2) { - if (numL2 > 0) { - for (i = 0; i < numL2; i++) { - if (calc_area) { - l2array[i].computeArea(); - } - if (procdynp.homoL2) { - l2.area.set_area(l2.area.get_area() + - l2array[i].area.get_area() * procdynp.numL2); - area.set_area( - area.get_area() + - l2.area.get_area()); // placement and routing overhead is 10%, l2 - // scales worse than cache 40% is accumulated - // from 90 to 22nm - } else { - l2.area.set_area(l2.area.get_area() + l2array[i].area.get_area()); - area.set_area( - area.get_area() + - l2array[i].area.get_area()); // placement and routing overhead is - // 10%, l2 scales worse than cache - // 40% is accumulated from 90 to 22nm - } - } - } - } - - // L3 Area: - if (numL3 > 0) { - for (i = 0; i < numL3; i++) { - l3array[i].computeArea(); - if (procdynp.homoL3) { - l3.area.set_area(l3.area.get_area() + - l3array[i].area.get_area() * procdynp.numL3); - area.set_area(area.get_area() + - l3.area.get_area()); // placement and routing overhead is - // 10%, l3 scales worse than cache - // 40% is accumulated from 90 to 22nm - } else { - l3.area.set_area(l3.area.get_area() + l3array[i].area.get_area()); - area.set_area( - area.get_area() + - l3array[i].area.get_area()); // placement and routing overhead is - // 10%, l3 scales worse than cache 40% - // is accumulated from 90 to 22nm - } - } - } - - // L1 Dir Area: - if (numL1Dir > 0) { - for (i = 0; i < numL1Dir; i++) { - l1dirarray[i].computeArea(); - if (procdynp.homoL1Dir) { - l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i].area.get_area() * procdynp.numL1Dir); - area.set_area( - area.get_area() + - l1dir.area.get_area()); // placement and routing overhead is 10%, - // l1dir scales worse than cache 40% is - // accumulated from 90 to 22nm - - } else { - l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i].area.get_area()); - area.set_area(area.get_area() + l1dirarray[i].area.get_area()); - } - } - } - - // L2 Dir Area: - if (numL2Dir > 0) { - for (i = 0; i < numL2Dir; i++) { - if (calc_area) { - l2dirarray[i].computeArea(); - } - if (procdynp.homoL2Dir) { - l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i].area.get_area() * procdynp.numL2Dir); - area.set_area( - area.get_area() + - l2dir.area.get_area()); // placement and routing overhead is 10%, - // l2dir scales worse than cache 40% is - // accumulated from 90 to 22nm - } else { - l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i].area.get_area()); - area.set_area(area.get_area() + l2dirarray[i].area.get_area()); - } - } - } - - // MC Calc Area: - if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - if (calc_area) { - mc.computeArea(); - } - mcs.area.set_area(mcs.area.get_area() + - mc.area.get_area() * XML->sys.mc.number_mcs); - area.set_area(area.get_area() + - mc.area.get_area() * XML->sys.mc.number_mcs); - } - - // Flash Controller Area: - if (XML->sys.flashc.number_mcs > 0) // flash controller - { - if (calc_area) { - flashcontroller.computeArea(); - } - double number_fcs = flashcontroller.fcp.num_mcs; - flashcontrollers.area.set_area(flashcontrollers.area.get_area() + - flashcontroller.area.get_area() * - number_fcs); - area.set_area(area.get_area() + flashcontrollers.area.get_area()); - } - - // Network Interface Unit Area - if (XML->sys.niu.number_units > 0) { - if (calc_area) { - niu.computeArea(); - } - nius.area.set_area(nius.area.get_area() + - niu.area.get_area() * XML->sys.niu.number_units); - area.set_area(area.get_area() + - niu.area.get_area() * XML->sys.niu.number_units); - } - - // PCIE Area - if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - if (calc_area) { - pcie.computeArea(); - } - pcies.area.set_area(pcies.area.get_area() + - pcie.area.get_area() * XML->sys.pcie.number_units); - area.set_area(area.get_area() + - pcie.area.get_area() * XML->sys.pcie.number_units); - } -} - -void Processor::compute_power() { - int i; - double pppm_t[4] = {1, 1, 1, 1}; - - // Compute Core Power - for (i = 0; i < numCore; i++) { cores[i]->computeEnergy(); cores[i]->computeEnergy(false); if (procdynp.homoCore) { + core.area.set_area(core.area.get_area() + + cores[i]->area.get_area() * procdynp.numCore); set_pppm(pppm_t, cores[i]->clockRate * procdynp.numCore, procdynp.numCore, @@ -454,9 +115,20 @@ void Processor::compute_power() { procdynp.numCore, procdynp.numCore); core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; + area.set_area(area.get_area() + + core.area.get_area()); // placement and routing overhead is + // 10%, core scales worse than cache + // 40% is accumulated from 90 to 22nm power = power + core.power; rt_power = rt_power + core.rt_power; } else { + core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); + area.set_area( + area.get_area() + + cores[i]->area.get_area()); // placement and routing overhead is 10%, + // core scales worse than cache 40% is + // accumulated from 90 to 22nm + set_pppm(pppm_t, cores[i]->clockRate, 1, 1, 1); core.power = core.power + cores[i]->power * pppm_t; power = power + cores[i]->power * pppm_t; @@ -467,138 +139,202 @@ void Processor::compute_power() { } } - // L2 Calc Power: if (!XML->sys.Private_L2) { if (numL2 > 0) { for (i = 0; i < numL2; i++) { - l2array[i].computeStaticPower(true); - l2array[i].computeStaticPower(); + l2array.push_back(new SharedCache()); + l2array[i]->set_params(XML, i, &interface_ip); + l2array[i]->set_stats(XML); + l2array[i]->computeArea(); + l2array[i]->computeStaticPower(true); + l2array[i]->computeStaticPower(); if (procdynp.homoL2) { + l2.area.set_area(l2.area.get_area() + + l2array[i]->area.get_area() * procdynp.numL2); set_pppm(pppm_t, - l2array[i].cachep.clockRate * procdynp.numL2, + l2array[i]->cachep.clockRate * procdynp.numL2, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.power = l2.power + l2array[i].power * pppm_t; + l2.power = l2.power + l2array[i]->power * pppm_t; set_pppm(pppm_t, - 1 / l2array[i].cachep.executionTime, + 1 / l2array[i]->cachep.executionTime, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; + l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l2.area.get_area()); // placement and routing overhead is 10%, l2 + // scales worse than cache 40% is accumulated + // from 90 to 22nm power = power + l2.power; rt_power = rt_power + l2.rt_power; } else { - set_pppm(pppm_t, l2array[i].cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i].power * pppm_t; - power = power + l2array[i].power * pppm_t; + l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); + area.set_area( + area.get_area() + + l2array[i] + ->area.get_area()); // placement and routing overhead is + // 10%, l2 scales worse than cache + // 40% is accumulated from 90 to 22nm + + set_pppm(pppm_t, l2array[i]->cachep.clockRate, 1, 1, 1); + l2.power = l2.power + l2array[i]->power * pppm_t; + power = power + l2array[i]->power * pppm_t; ; - set_pppm(pppm_t, 1 / l2array[i].cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; - rt_power = rt_power + l2array[i].rt_power * pppm_t; + set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, 1, 1, 1); + l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; + rt_power = rt_power + l2array[i]->rt_power * pppm_t; } } } } - // L3 Power: if (numL3 > 0) { for (i = 0; i < numL3; i++) { - l3array[i].computeStaticPower(true); - l3array[i].computeStaticPower(); + l3array.push_back(new SharedCache()); + l3array[i]->set_params(XML, i, &interface_ip, L3); + l3array[i]->set_stats(XML); + l3array[i]->computeArea(); + l3array[i]->computeStaticPower(true); + l3array[i]->computeStaticPower(); if (procdynp.homoL3) { + l3.area.set_area(l3.area.get_area() + + l3array[i]->area.get_area() * procdynp.numL3); set_pppm(pppm_t, - l3array[i].cachep.clockRate * procdynp.numL3, + l3array[i]->cachep.clockRate * procdynp.numL3, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.power = l3.power + l3array[i].power * pppm_t; + l3.power = l3.power + l3array[i]->power * pppm_t; set_pppm(pppm_t, - 1 / l3array[i].cachep.executionTime, + 1 / l3array[i]->cachep.executionTime, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; + l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; + area.set_area(area.get_area() + + l3.area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache + // 40% is accumulated from 90 to 22nm power = power + l3.power; rt_power = rt_power + l3.rt_power; + } else { - set_pppm(pppm_t, l3array[i].cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i].power * pppm_t; - power = power + l3array[i].power * pppm_t; - set_pppm(pppm_t, 1 / l3array[i].cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; - rt_power = rt_power + l3array[i].rt_power * pppm_t; + l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); + area.set_area( + area.get_area() + + l3array[i]->area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache 40% + // is accumulated from 90 to 22nm + set_pppm(pppm_t, l3array[i]->cachep.clockRate, 1, 1, 1); + l3.power = l3.power + l3array[i]->power * pppm_t; + power = power + l3array[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, 1, 1, 1); + l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; + rt_power = rt_power + l3array[i]->rt_power * pppm_t; } } } - - // L1 Dir Power: if (numL1Dir > 0) { for (i = 0; i < numL1Dir; i++) { - l1dirarray[i].computeStaticPower(true); - l1dirarray[i].computeStaticPower(); + l1dirarray.push_back(new SharedCache()); + l1dirarray[i]->set_params(XML, i, &interface_ip, L1Directory); + l1dirarray[i]->set_stats(XML); + l1dirarray[i]->computeArea(); + l1dirarray[i]->computeStaticPower(true); + l1dirarray[i]->computeStaticPower(); if (procdynp.homoL1Dir) { + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i]->area.get_area() * procdynp.numL1Dir); set_pppm(pppm_t, - l1dirarray[i].cachep.clockRate * procdynp.numL1Dir, + l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; + l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; set_pppm(pppm_t, - 1 / l1dirarray[i].cachep.executionTime, + 1 / l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; + l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l1dir.area.get_area()); // placement and routing overhead is 10%, + // l1dir scales worse than cache 40% is + // accumulated from 90 to 22nm power = power + l1dir.power; rt_power = rt_power + l1dir.rt_power; } else { - set_pppm(pppm_t, l1dirarray[i].cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; - power = power + l1dirarray[i].power; - set_pppm(pppm_t, 1 / l1dirarray[i].cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; - rt_power = rt_power + l1dirarray[i].rt_power; + l1dir.area.set_area(l1dir.area.get_area() + + l1dirarray[i]->area.get_area()); + area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); + set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate, 1, 1, 1); + l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; + power = power + l1dirarray[i]->power; + set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, 1, 1, 1); + l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; + rt_power = rt_power + l1dirarray[i]->rt_power; } } } - - // L2 Dir Power - if (numL2Dir > 0) { + if (numL2Dir > 0) for (i = 0; i < numL2Dir; i++) { - l2dirarray[i].computeStaticPower(true); - l2dirarray[i].computeStaticPower(); + l2dirarray.push_back(new SharedCache()); + l2dirarray[i]->set_params(XML, i, &interface_ip, L2Directory); + l2dirarray[i]->set_stats(XML); + l2dirarray[i]->computeArea(); + l2dirarray[i]->computeStaticPower(true); + l2dirarray[i]->computeStaticPower(); if (procdynp.homoL2Dir) { + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i]->area.get_area() * procdynp.numL2Dir); set_pppm(pppm_t, - l2dirarray[i].cachep.clockRate * procdynp.numL2Dir, + l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; + l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; set_pppm(pppm_t, - 1 / l2dirarray[i].cachep.executionTime, + 1 / l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; + l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; + area.set_area( + area.get_area() + + l2dir.area.get_area()); // placement and routing overhead is 10%, + // l2dir scales worse than cache 40% is + // accumulated from 90 to 22nm power = power + l2dir.power; rt_power = rt_power + l2dir.rt_power; } else { - set_pppm(pppm_t, l2dirarray[i].cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; - power = power + l2dirarray[i].power * pppm_t; - set_pppm(pppm_t, 1 / l2dirarray[i].cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; - rt_power = rt_power + l2dirarray[i].rt_power * pppm_t; + l2dir.area.set_area(l2dir.area.get_area() + + l2dirarray[i]->area.get_area()); + area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); + set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate, 1, 1, 1); + l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; + power = power + l2dirarray[i]->power * pppm_t; + set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, 1, 1, 1); + l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; + rt_power = rt_power + l2dirarray[i]->rt_power * pppm_t; } } - } - // MC Calc Power: if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { + mc.set_params(XML, &interface_ip, MC); + mc.computeArea(); + mcs.area.set_area(mcs.area.get_area() + + mc.area.get_area() * XML->sys.mc.number_mcs); + area.set_area(area.get_area() + + mc.area.get_area() * XML->sys.mc.number_mcs); + mc.computeStaticPower(); + mc.set_stats(XML); mc.computeDynamicPower(); set_pppm(pppm_t, XML->sys.mc.number_mcs * mc.mcp.clockRate, @@ -616,12 +352,18 @@ void Processor::compute_power() { rt_power = rt_power + mcs.rt_power; } - // Flash Controller Power: if (XML->sys.flashc.number_mcs > 0) // flash controller { + flashcontroller.set_params(XML, &interface_ip); + flashcontroller.set_stats(XML); + flashcontroller.computeArea(); flashcontroller.computeStaticPower(); flashcontroller.computeDynamicPower(); double number_fcs = flashcontroller.fcp.num_mcs; + flashcontrollers.area.set_area(flashcontrollers.area.get_area() + + flashcontroller.area.get_area() * + number_fcs); + area.set_area(area.get_area() + flashcontrollers.area.get_area()); set_pppm(pppm_t, number_fcs, number_fcs, number_fcs, number_fcs); flashcontrollers.power = flashcontroller.power * pppm_t; power = power + flashcontrollers.power; @@ -630,15 +372,21 @@ void Processor::compute_power() { rt_power = rt_power + flashcontrollers.rt_power; } - // Network Interface Unit Power if (XML->sys.niu.number_units > 0) { + niu.set_params(XML, &interface_ip); + niu.computeArea(); niu.computeStaticPower(); - niu.computeDynamicPower(); + nius.area.set_area(nius.area.get_area() + + niu.area.get_area() * XML->sys.niu.number_units); + area.set_area(area.get_area() + + niu.area.get_area() * XML->sys.niu.number_units); set_pppm(pppm_t, XML->sys.niu.number_units * niu.niup.clockRate, XML->sys.niu.number_units, XML->sys.niu.number_units, XML->sys.niu.number_units); + niu.set_stats(XML); + niu.computeDynamicPower(); nius.power = niu.power * pppm_t; power = power + nius.power; set_pppm(pppm_t, @@ -650,15 +398,22 @@ void Processor::compute_power() { rt_power = rt_power + nius.rt_power; } - // PCIE Power if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - pcie.computeStaticPower(); - pcie.computeDynamicPower(); + pcie.set_params(XML, &interface_ip); + pcie.computeArea(); + pcies.area.set_area(pcies.area.get_area() + + pcie.area.get_area() * XML->sys.pcie.number_units); + area.set_area(area.get_area() + + pcie.area.get_area() * XML->sys.pcie.number_units); set_pppm(pppm_t, XML->sys.pcie.number_units * pcie.pciep.clockRate, XML->sys.pcie.number_units, XML->sys.pcie.number_units, XML->sys.pcie.number_units); + + pcie.set_stats(XML); + pcie.computeStaticPower(); + pcie.computeDynamicPower(); pcies.power = pcie.power * pppm_t; power = power + pcies.power; set_pppm(pppm_t, @@ -669,6 +424,101 @@ void Processor::compute_power() { pcies.rt_power = pcie.rt_power * pppm_t; rt_power = rt_power + pcies.rt_power; } + + if (numNOC > 0) { + for (i = 0; i < numNOC; i++) { + if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used + nocs.push_back(new NoC(XML, i, &interface_ip, 1)); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } else { // Bus based interconnect + nocs.push_back( + new NoC(XML, + i, + &interface_ip, + 1, + sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->area.get_area() * procdynp.numNOC); + area.set_area(area.get_area() + noc.area.get_area()); + } else { + noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); + area.set_area(area.get_area() + nocs[i]->area.get_area()); + } + } + } + + /* + * Compute global links associated with each NOC, if any. This must be done + * at the end (even after the NOC router part) since the total chip area + * must be obtain to decide the link routing + */ + for (i = 0; i < numNOC; i++) { + if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { + nocs[i]->init_link_bus( + sqrt(area.get_area() * + XML->sys.NoC[i].chip_coverage)); // compute global links + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + } else { + noc.area.set_area(noc.area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + area.set_area(area.get_area() + + nocs[i]->link_bus_tot_per_Router.area.get_area() * + nocs[i]->nocdynp.total_nodes); + } + } + } + // Compute energy of NoC (w or w/o links) or buses + for (i = 0; i < numNOC; i++) { + nocs[i]->computeEnergy(); + nocs[i]->computeEnergy(false); + if (procdynp.homoNOC) { + set_pppm(pppm_t, + procdynp.numNOC * nocs[i]->nocdynp.clockRate, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); + noc.power = noc.power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, + 1 / nocs[i]->nocdynp.executionTime, + procdynp.numNOC, + procdynp.numNOC, + procdynp.numNOC); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + power = power + noc.power; + rt_power = rt_power + noc.rt_power; + } else { + set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); + noc.power = noc.power + nocs[i]->power * pppm_t; + power = power + nocs[i]->power * pppm_t; + set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); + noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + rt_power = rt_power + nocs[i]->rt_power * pppm_t; + } + } + } + + // //clock power + // globalClock.init_wire_external(is_default, &interface_ip); + // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 + // globalClock.end_wiring_level =5;//toplevel metal + // globalClock.start_wiring_level =5;//toplevel metal + // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local + // final nodes globalClock.optimize_wire(); } void Processor::displayDeviceType(int device_type_, uint32_t indent) { @@ -1063,26 +913,26 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } if (!XML->sys.Private_L2) { for (i = 0; i < numL2; i++) { - l2array[i].display(indent + 4, is_tdp); + l2array[i]->display(indent + 4, is_tdp); cout << "************************************************************" "*****************************" << endl; } } for (i = 0; i < numL3; i++) { - l3array[i].display(indent + 4, is_tdp); + l3array[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL1Dir; i++) { - l1dirarray[i].display(indent + 4, is_tdp); + l1dirarray[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; } for (i = 0; i < numL2Dir; i++) { - l2dirarray[i].display(indent + 4, is_tdp); + l2dirarray[i]->display(indent + 4, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -1255,8 +1105,24 @@ Processor::~Processor() { delete cores.back(); cores.pop_back(); } + while (!l2array.empty()) { + delete l2array.back(); + l2array.pop_back(); + } + while (!l3array.empty()) { + delete l3array.back(); + l3array.pop_back(); + } while (!nocs.empty()) { delete nocs.back(); nocs.pop_back(); } + while (!l1dirarray.empty()) { + delete l1dirarray.back(); + l1dirarray.pop_back(); + } + while (!l2dirarray.empty()) { + delete l2dirarray.back(); + l2dirarray.pop_back(); + } }; diff --git a/src/processor.h b/src/processor.h index cd61333..416d4d4 100644 --- a/src/processor.h +++ b/src/processor.h @@ -51,12 +51,12 @@ class Processor : public Component { public: - const ParseXML *XML; + ParseXML *XML; vector cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; + vector l2array; + vector l3array; + vector l1dirarray; + vector l2dirarray; vector nocs; MemoryController mc; NIUController niu; @@ -69,19 +69,13 @@ class Processor : public Component { Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies, flashcontrollers; int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface, const bool calc_area = true); - void compute(ParseXML *XML_interface); - void create(const ParseXML *XML_interface, const bool calc_area = true); + Processor(ParseXML *XML_interface); + void compute(); + void set_proc_param(); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void displayDeviceType(int device_type_, uint32_t indent = 0); void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); ~Processor(); - -private: - void set_proc_param(); - void init(); - void compute_area(const bool calc_area = true); - void compute_power(); }; #endif /* PROCESSOR_H_ */ From 57a10c61f10b9825d01b624533ad32856a427fdb Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 20 Jun 2020 22:37:55 -0500 Subject: [PATCH 29/59] functional-unit: Split the class up into separate methods. Need to split up area calculations. --- src/core/exec_unit.cc | 127 +++++++------- src/core/exec_unit.h | 6 +- src/logic/functional_unit.cc | 323 +++++++++++++++++++---------------- src/logic/functional_unit.h | 26 ++- src/noc.cc | 11 +- src/noc.h | 1 + 6 files changed, 276 insertions(+), 218 deletions(-) diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 358e71e..06e6d6f 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -49,12 +49,12 @@ EXECU::EXECU(const ParseXML *XML_interface, const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), fp_u(0), - exeu(0), mul(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), + lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), intTag_mul_Bypass(0), fp_bypass(0), fpTagBypass(0), exist(exist_) { bool exist_flag = true; - if (!exist) + if (!exist) { return; + } double fu_height = 0.0; clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; @@ -66,18 +66,24 @@ EXECU::EXECU(const ParseXML *XML_interface, scheu->set_params(XML, ithCore, &interface_ip, coredynp); scheu->computeArea(); scheu->set_stats(XML); - exeu = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, ALU); - area.set_area(area.get_area() + exeu->area.get_area() + rfu->area.get_area() + + exeu.set_params(XML, ithCore, &interface_ip, coredynp, ALU); + exeu.set_stats(XML); + exeu.computeArea(); + area.set_area(area.get_area() + exeu.area.get_area() + rfu->area.get_area() + scheu->area.get_area()); - fu_height = exeu->FU_height; + fu_height = exeu.FU_height; if (coredynp.num_fpus > 0) { - fp_u = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, FPU); - area.set_area(area.get_area() + fp_u->area.get_area()); + fp_u.set_params(XML, ithCore, &interface_ip, coredynp, FPU); + fp_u.set_stats(XML); + fp_u.computeArea(); + area.set_area(area.get_area() + fp_u.area.get_area()); } if (coredynp.num_muls > 0) { - mul = new FunctionalUnit(XML, ithCore, &interface_ip, coredynp, MUL); - area.set_area(area.get_area() + mul->area.get_area()); - fu_height += mul->FU_height; + mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); + mul.set_stats(XML); + mul.computeArea(); + area.set_area(area.get_area() + mul.area.get_area()); + fu_height += mul.FU_height; } /* * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; @@ -107,7 +113,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, + rfu->int_regfile_height + exeu.FU_height + lsq_height, &interface_ip, 3, false, @@ -120,7 +126,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + + rfu->int_regfile_height + exeu.FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, @@ -138,8 +144,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height, + rfu->fp_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height, &interface_ip, 3, false, @@ -154,8 +160,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height + scheu->Iw_height, + rfu->fp_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, false, @@ -173,7 +179,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + fp_u->FU_height, + rfu->fp_regfile_height + fp_u.FU_height, &interface_ip, 3, false, @@ -186,7 +192,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, @@ -209,7 +215,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + + rfu->int_regfile_height + exeu.FU_height + lsq_height, &interface_ip, 3, @@ -225,7 +231,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, coredynp.phy_ireg_width, rfu->int_regfile_height + - exeu->FU_height + lsq_height + + exeu.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -243,8 +249,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + - mul->FU_height + lsq_height, + rfu->int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height, &interface_ip, 3, false, @@ -257,7 +263,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + rfu->int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -277,7 +283,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, + rfu->fp_regfile_height + fp_u.FU_height, &interface_ip, 3, false, @@ -290,7 +296,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -313,7 +319,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + + rfu->int_regfile_height + exeu.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, @@ -328,7 +334,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, coredynp.phy_ireg_width, rfu->int_regfile_height + - exeu->FU_height + lsq_height + + exeu.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -347,7 +353,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + rfu->int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -361,7 +367,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + + rfu->int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -381,7 +387,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, @@ -396,7 +402,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, @@ -422,17 +428,32 @@ void EXECU::computeEnergy(bool is_tdp) { // rfu->rt_power.reset(); // scheu->power.reset(); // scheu->rt_power.reset(); - // exeu->power.reset(); - // exeu->rt_power.reset(); + // exeu.power.reset(); + // exeu.rt_power.reset(); rfu->computeDynamicPower(is_tdp); scheu->computeDynamicPower(is_tdp); - exeu->computeEnergy(is_tdp); + if(is_tdp) { + exeu.computePower(); + } + else { + exeu.computeRuntimeDynamicPower(); + } if (coredynp.num_fpus > 0) { - fp_u->computeEnergy(is_tdp); + if(is_tdp) { + fp_u.computePower(); + } + else { + fp_u.computeRuntimeDynamicPower(); + } } if (coredynp.num_muls > 0) { - mul->computeEnergy(is_tdp); + if(is_tdp) { + mul.computePower(); + } + else { + mul.computeRuntimeDynamicPower(); + } } if (is_tdp) { @@ -457,7 +478,7 @@ void EXECU::computeEnergy(bool is_tdp) { // be passed for each int instruction. bypass.power = bypass.power + intTag_mul_Bypass->power * pppm_t + int_mul_bypass->power * pppm_t; - power = power + mul->power; + power = power + mul.power; } if (coredynp.num_fpus > 0) { set_pppm( @@ -470,10 +491,10 @@ void EXECU::computeEnergy(bool is_tdp) { // to be passed for each fp instruction. bypass.power = bypass.power + fp_bypass->power * pppm_t + fpTagBypass->power * pppm_t; - power = power + fp_u->power; + power = power + fp_u.power; } - power = power + rfu->power + exeu->power + bypass.power + scheu->power; + power = power + rfu->power + exeu.power + bypass.power + scheu->power; } else { set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, @@ -493,7 +514,7 @@ void EXECU::computeEnergy(bool is_tdp) { // be passed for each int instruction. bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power * pppm_t + int_mul_bypass->power * pppm_t; - rt_power = rt_power + mul->rt_power; + rt_power = rt_power + mul.rt_power; } if (coredynp.num_fpus > 0) { @@ -504,9 +525,9 @@ void EXECU::computeEnergy(bool is_tdp) { XML->sys.core[ithCore].cdb_fpu_accesses); bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; - rt_power = rt_power + fp_u->rt_power; + rt_power = rt_power + fp_u.rt_power; } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + + rt_power = rt_power + rfu->rt_power + exeu.rt_power + bypass.rt_power + scheu->rt_power; } } @@ -572,12 +593,12 @@ void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (plevel > 3) { scheu->displayEnergy(indent + 4, is_tdp); } - exeu->displayEnergy(indent, is_tdp); + exeu.display(indent, is_tdp); if (coredynp.num_fpus > 0) { - fp_u->displayEnergy(indent, is_tdp); + fp_u.display(indent, is_tdp); } if (coredynp.num_muls > 0) { - mul->displayEnergy(indent, is_tdp); + mul.display(indent, is_tdp); } cout << indent_str << "Results Broadcast Bus:" << endl; cout << indent_str_next @@ -652,18 +673,6 @@ EXECU ::~EXECU() { delete fpTagBypass; fpTagBypass = 0; } - if (fp_u) { - delete fp_u; - fp_u = 0; - } - if (exeu) { - delete exeu; - exeu = 0; - } - if (mul) { - delete mul; - mul = 0; - } if (rfu) { delete rfu; rfu = 0; @@ -672,4 +681,4 @@ EXECU ::~EXECU() { delete scheu; scheu = 0; } -} \ No newline at end of file +} diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index fb60c90..75c40dd 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -55,9 +55,9 @@ class EXECU : public Component { CoreDynParam coredynp; RegFU *rfu; SchedulerU *scheu; - FunctionalUnit *fp_u; - FunctionalUnit *exeu; - FunctionalUnit *mul; + FunctionalUnit fp_u; + FunctionalUnit exeu; + FunctionalUnit mul; interconnect *int_bypass; interconnect *intTagBypass; interconnect *int_mul_bypass; diff --git a/src/logic/functional_unit.cc b/src/logic/functional_unit.cc index c560f0a..29ca683 100644 --- a/src/logic/functional_unit.cc +++ b/src/logic/functional_unit.cc @@ -31,14 +31,10 @@ #include "functional_unit.h" -FunctionalUnit::FunctionalUnit(const ParseXML *XML, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - enum FU_type fu_type_) - : ithCore(ithCore_), interface_ip(*interface_ip_), coredynp(dyn_p_), - fu_type(fu_type_) { - +FunctionalUnit::FunctionalUnit() { + init_params = false; + init_stats = false; + set_area = false; long_channel = false; power_gating = false; embedded = false; @@ -46,11 +42,32 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, ialu_accesses = 0; fpu_accesses = 0; + FU_height = 0.0; + clockRate = 0.0; + executionTime = 0.0; + num_fu = 0.0; + energy = 0.0; + base_energy = 0.0; + per_access_energy = 0.0; + leakage = 0.0; + gate_leakage = 0.0; +} + +void FunctionalUnit::set_params(const ParseXML *XML, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + enum FU_type fu_type_) { + ithCore = ithCore_; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + fu_type = fu_type_; + long_channel = XML->sys.longer_channel_device; power_gating = XML->sys.power_gating; embedded = XML->sys.Embedded; - double area_t; //, leakage, gate_leakage; + double area_t = 0.0; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; @@ -269,11 +286,11 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, FU_height = (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data } else { - std::cout << "Unknown Functional Unit Type" << std::endl; - exit(0); + std::cout << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << + std::endl; + exit(1); } } - set_stats(XML); // IEXEU, simple ALU and FPU // double C_ALU, C_EXEU, C_FPU; //Lum Equivalent capacitance of IEXEU and @@ -287,86 +304,104 @@ FunctionalUnit::FunctionalUnit(const ParseXML *XML, gate_leakage *= num_fu; double macro_layout_overhead = g_tp.macro_layout_overhead; area.set_area(area.get_area() * macro_layout_overhead); + init_params = true; + set_stats(XML); } void FunctionalUnit::set_stats(const ParseXML *XML) { mul_accesses = XML->sys.core[ithCore].mul_accesses; ialu_accesses = XML->sys.core[ithCore].ialu_accesses; fpu_accesses = XML->sys.core[ithCore].fpu_accesses; + init_stats = true; } -void FunctionalUnit::computeEnergy(bool is_tdp) { - double pppm_t[4] = {1, 1, 1, 1}; - double FU_duty_cycle = 0.0; - if (is_tdp) { +void FunctionalUnit::computeArea() { - set_pppm(pppm_t, 2, 2, 2, 2); // 2 means two source operands needs to be - // passed for each int instruction. - if (fu_type == FPU) { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.FPU_duty_cycle; - } else if (fu_type == ALU) { - stats_t.readAc.access = 1 * num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.ALU_duty_cycle; - } else if (fu_type == MUL) { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.MUL_duty_cycle; - } + set_area = true; +} - // power.readOp.dynamic = base_energy/clockRate + - // energy*stats_t.readAc.access; - power.readOp.dynamic = - per_access_energy * stats_t.readAc.access + base_energy / clockRate; - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation * FU_duty_cycle; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; +void FunctionalUnit::computePower() { + double FU_duty_cycle = 0.0; + if (!init_params) { + std::cerr << "[ FunctionalUnit ] Error: must set params before calling " + "computePower()\n"; + exit(1); + } + if (!set_area) { + std::cerr << "[ FunctionalUnit ] Error: must computeArea before calling " + "computePower()\n"; + exit(1); + } + if (fu_type == FPU) { + stats_t.readAc.access = num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.FPU_duty_cycle; + } else if (fu_type == ALU) { + stats_t.readAc.access = 1 * num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.ALU_duty_cycle; + } else if (fu_type == MUL) { + stats_t.readAc.access = num_fu; + tdp_stats = stats_t; + FU_duty_cycle = coredynp.MUL_duty_cycle; + } - power.readOp.leakage = leakage; - power.readOp.gate_leakage = gate_leakage; - double long_channel_device_reduction = - longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = - power.readOp.leakage * long_channel_device_reduction; + // power.readOp.dynamic = base_energy/clockRate + + // energy*stats_t.readAc.access; + power.readOp.dynamic = + per_access_energy * tdp_stats.readAc.access + base_energy / clockRate; + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation * FU_duty_cycle; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; - power.readOp.power_gated_with_long_channel_leakage = - power.readOp.power_gated_leakage * long_channel_device_reduction; + power.readOp.leakage = leakage; + power.readOp.gate_leakage = gate_leakage; + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; - } else { - if (fu_type == FPU) { - stats_t.readAc.access = fpu_accesses; - rtp_stats = stats_t; - } else if (fu_type == ALU) { - stats_t.readAc.access = ialu_accesses; - rtp_stats = stats_t; - } else if (fu_type == MUL) { - stats_t.readAc.access = mul_accesses; - rtp_stats = stats_t; - } + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; +} - // rt_power.readOp.dynamic = base_energy*executionTime + - // energy*stats_t.readAc.access; - rt_power.readOp.dynamic = - per_access_energy * stats_t.readAc.access + base_energy * executionTime; - double sckRation = g_tp.sckt_co_eff; - rt_power.readOp.dynamic *= sckRation; - rt_power.writeOp.dynamic *= sckRation; - rt_power.searchOp.dynamic *= sckRation; +void FunctionalUnit::computeRuntimeDynamicPower() { + if (!init_stats) { + std::cerr << "[ FunctionalUnit ] Error: must set stats before calling " + "computePower()\n"; + exit(1); + } + if (fu_type == FPU) { + stats_t.readAc.access = fpu_accesses; + rtp_stats = stats_t; + } else if (fu_type == ALU) { + stats_t.readAc.access = ialu_accesses; + rtp_stats = stats_t; + } else if (fu_type == MUL) { + stats_t.readAc.access = mul_accesses; + rtp_stats = stats_t; } + + // rt_power.readOp.dynamic = base_energy*executionTime + + // energy*rtp_stats.readAc.access; + rt_power.readOp.dynamic = + per_access_energy * rtp_stats.readAc.access + base_energy * executionTime; + double sckRation = g_tp.sckt_co_eff; + rt_power.readOp.dynamic *= sckRation; + rt_power.writeOp.dynamic *= sckRation; + rt_power.searchOp.dynamic *= sckRation; } -void FunctionalUnit::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +void FunctionalUnit::display(uint32_t indent, bool enable) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); // std::cout << indent_str_next << "Results Broadcast Bus Area = " << // bypass->area.get_area() *1e-6 << " mm^2" << std::endl; - if (is_tdp) { + if (enable) { if (fu_type == FPU) { std::cout << indent_str << "Floating Point Units (FPUs) (Count: " << coredynp.num_fpus @@ -460,77 +495,79 @@ void FunctionalUnit::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } void FunctionalUnit::leakage_feedback(double temperature) { - // Update the temperature and initialize the global interfaces. - interface_ip.temp = (unsigned int)round(temperature / 10.0) * 10; - - uca_org_t init_result = init_interface(&interface_ip); // init_result is dummy - - // This is part of FunctionalUnit() - double area_t, leakage, gate_leakage; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - - if (fu_type == FPU) { - area_t = 4.47 * 1e6 * - (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / - 90.0); // this is um^2 The base number - if (g_ip->F_sz_nm > 90) - area_t = - 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - } else if (fu_type == ALU) { - area_t = 280 * 260 * 2 * num_fu * - g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - } else if (fu_type == MUL) { - area_t = 280 * 260 * 2 * 3 * num_fu * - g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - } else { - std::cout << "Unknown Functional Unit Type" << std::endl; - exit(1); - } - - power.readOp.leakage = leakage * num_fu; - power.readOp.gate_leakage = gate_leakage * num_fu; - power.readOp.longer_channel_leakage = - longer_channel_device_reduction(Core_device, coredynp.core_ty) * - power.readOp.leakage; - - double pg_reduction = power_gating_leakage_reduction(false); - power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; +// // Update the temperature and initialize the global interfaces. +// interface_ip.temp = (unsigned int)round(temperature / 10.0) * 10; +// +// uca_org_t init_result = init_interface(&interface_ip); // init_result is dummy +// +// // This is part of FunctionalUnit() +// double area_t = 0.0; +// double leakage = 0.0; +// double gate_leakage = 0.0; +// double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); +// +// if (fu_type == FPU) { +// area_t = 4.47 * 1e6 * +// (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / +// 90.0); // this is um^2 The base number +// if (g_ip->F_sz_nm > 90) +// area_t = +// 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 +// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Isub_leakage(5 * g_tp.min_w_nmos_, +// 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; // unit W +// gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Ig_leakage(5 * g_tp.min_w_nmos_, +// 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; // unit W +// } else if (fu_type == ALU) { +// area_t = 280 * 260 * 2 * num_fu * +// g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl +// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Isub_leakage(20 * g_tp.min_w_nmos_, +// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; // unit W +// gate_leakage = +// area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Ig_leakage(20 * g_tp.min_w_nmos_, +// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; +// } else if (fu_type == MUL) { +// area_t = 280 * 260 * 2 * 3 * num_fu * +// g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl +// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Isub_leakage(20 * g_tp.min_w_nmos_, +// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; // unit W +// gate_leakage = +// area_t * (g_tp.scaling_factor.core_tx_density) * +// cmos_Ig_leakage(20 * g_tp.min_w_nmos_, +// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, +// 1, +// inv) * +// g_tp.peri_global.Vdd / 2; +// } else { +// std::cout << "Unknown Functional Unit Type" << std::endl; +// exit(1); +// } +// +// power.readOp.leakage = leakage * num_fu; +// power.readOp.gate_leakage = gate_leakage * num_fu; +// power.readOp.longer_channel_leakage = +// longer_channel_device_reduction(Core_device, coredynp.core_ty) * +// power.readOp.leakage; +// +// double pg_reduction = power_gating_leakage_reduction(false); +// power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; } diff --git a/src/logic/functional_unit.h b/src/logic/functional_unit.h index e569f8b..6db759e 100644 --- a/src/logic/functional_unit.h +++ b/src/logic/functional_unit.h @@ -55,7 +55,11 @@ class FunctionalUnit : public Component { double FU_height; double clockRate, executionTime; double num_fu; - double energy, base_energy, per_access_energy, leakage, gate_leakage; + double energy; + double base_energy; + double per_access_energy; + double leakage; + double gate_leakage; bool is_default; enum FU_type fu_type; statsDef tdp_stats; @@ -63,17 +67,23 @@ class FunctionalUnit : public Component { statsDef stats_t; powerDef power_t; - FunctionalUnit(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - enum FU_type fu_type); + FunctionalUnit(); + void set_params(const ParseXML *XML_interface, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + enum FU_type fu_type); void set_stats(const ParseXML *XML); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void computeArea(); + void computePower(); + void computeRuntimeDynamicPower(); + void display(uint32_t indent = 0, bool enable = true); void leakage_feedback(double temperature); private: + bool init_params; + bool init_stats; + bool set_area; bool long_channel; bool power_gating; bool embedded; diff --git a/src/noc.cc b/src/noc.cc index 1f7831e..fe6a0a5 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -68,11 +68,12 @@ NoC::NoC(const ParseXML *XML_interface, local_result = init_interface(&interface_ip); scktRatio = g_tp.sckt_co_eff; - if (nocdynp.type) { /* - * if NOC compute router, router links must be computed - * separately and called from external since total chip - * area must be known first - */ + if (nocdynp.type) { + /* + * if NOC compute router, router links must be computed + * separately and called from external since total chip + * area must be known first + */ init_router(); } else { init_link_bus(link_len_); // if bus compute bus diff --git a/src/noc.h b/src/noc.h index 10a36c6..3367bc2 100644 --- a/src/noc.h +++ b/src/noc.h @@ -70,6 +70,7 @@ class NoC : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void init_link_bus(double link_len_); void init_router(); + // TODO void computeEnergy_link_bus(bool is_tdp = true); void displayEnergy_link_bus(uint32_t indent = 0, int plevel = 100, From 63059dc379dbfca2782740366e820f4ade2aeb83 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 20 Jun 2020 22:56:59 -0500 Subject: [PATCH 30/59] functional-unit: Finished splitting up the compute area & compute power. --- src/logic/functional_unit.cc | 450 ++++++++++++++++------------------- src/logic/functional_unit.h | 9 +- 2 files changed, 207 insertions(+), 252 deletions(-) diff --git a/src/logic/functional_unit.cc b/src/logic/functional_unit.cc index 29ca683..4c22987 100644 --- a/src/logic/functional_unit.cc +++ b/src/logic/functional_unit.cc @@ -51,6 +51,8 @@ FunctionalUnit::FunctionalUnit() { per_access_energy = 0.0; leakage = 0.0; gate_leakage = 0.0; + + area_t = 0.0; } void FunctionalUnit::set_params(const ParseXML *XML, @@ -66,15 +68,19 @@ void FunctionalUnit::set_params(const ParseXML *XML, long_channel = XML->sys.longer_channel_device; power_gating = XML->sys.power_gating; embedded = XML->sys.Embedded; - - double area_t = 0.0; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; + init_params = true; +} + +void FunctionalUnit::set_stats(const ParseXML *XML) { + mul_accesses = XML->sys.core[ithCore].mul_accesses; + ialu_accesses = XML->sys.core[ithCore].ialu_accesses; + fpu_accesses = XML->sys.core[ithCore].fpu_accesses; + init_stats = true; +} - // XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); +void FunctionalUnit::computeArea() { if (embedded) { if (fu_type == FPU) { num_fu = coredynp.num_fpus; @@ -85,36 +91,10 @@ void FunctionalUnit::set_params(const ParseXML *XML, 90.0); // this is um^2 The base number // 4.47 contains both VFP and NEON processing unit, VFP is about 40% and // NEON is about 60% - if (g_ip->F_sz_nm > 90) + if (g_ip->F_sz_nm > 90) { area_t = 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction - // in FPU usually it can have up to 20 cycles. - // base_energy = coredynp.core_ty==Inorder? 0: - // 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and - // 773Mhz (Wattch) base_energy - //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = - 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - // //This is per Hz energy(nJ) - // FPU power from Sandia's processor sizing tech report + } FU_height = (18667 * num_fu) * interface_ip.F_sz_um; // FPU from Sun's // data } else if (fu_type == ALU) { @@ -122,30 +102,6 @@ void FunctionalUnit::set_params(const ParseXML *XML, area_t = 280 * 260 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - // base_energy = coredynp.core_ty==Inorder? - // 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and - // 773Mhz (Wattch) base_energy - //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = - 1.15 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - ////This is per cycle energy(nJ) FU_height = (6222 * num_fu) * interface_ip.F_sz_um; // integer ALU } else if (fu_type == MUL) { @@ -153,38 +109,13 @@ void FunctionalUnit::set_params(const ParseXML *XML, area_t = 280 * 260 * 3 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - // base_energy = coredynp.core_ty==Inorder? - // 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and - // 773Mhz (Wattch) base_energy - //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - base_energy = 0; - per_access_energy = - 1.15 * 2 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - ////This is per cycle energy(nJ), coefficient based on Wattch FU_height = (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data } else { - std::cout << "Unknown Functional Unit Type" << std::endl; + std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << + std::endl; exit(0); } - per_access_energy *= 0.5; // According to ARM data embedded processor has - // much lower per acc energy } else { if (fu_type == FPU) { num_fu = coredynp.num_fpus; @@ -192,35 +123,10 @@ void FunctionalUnit::set_params(const ParseXML *XML, // um^2 area_t = 8.47 * 1e6 * (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / 90.0); // this is um^2 - if (g_ip->F_sz_nm > 90) + if (g_ip->F_sz_nm > 90) { area_t = 8.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(5 * g_tp.min_w_nmos_, - 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction - // in FPU usually it can have up to 20 cycles. - base_energy = coredynp.core_ty == Inorder - ? 0 - : 89e-3 * 3; // W The base energy of ALU average numbers - // from Intel 4G and 773Mhz (Wattch) - base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); - per_access_energy = - 1.15 * 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - // //This is per op energy(nJ) + } FU_height = (38667 * num_fu) * interface_ip.F_sz_um; // FPU from Sun's // data } else if (fu_type == ALU) { @@ -228,30 +134,6 @@ void FunctionalUnit::set_params(const ParseXML *XML, area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - base_energy = coredynp.core_ty == Inorder - ? 0 - : 89e-3; // W The base energy of ALU average numbers - // from Intel 4G and 773Mhz (Wattch) - base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); - per_access_energy = - 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - ////This is per cycle energy(nJ) FU_height = (6222 * num_fu) * interface_ip.F_sz_um; // integer ALU } else if (fu_type == MUL) { @@ -259,30 +141,6 @@ void FunctionalUnit::set_params(const ParseXML *XML, area_t = 280 * 260 * 2 * 3 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl - leakage = area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Isub_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; // unit W - gate_leakage = - area_t * (g_tp.scaling_factor.core_tx_density) * - cmos_Ig_leakage(20 * g_tp.min_w_nmos_, - 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, - 1, - inv) * - g_tp.peri_global.Vdd / 2; - base_energy = coredynp.core_ty == Inorder - ? 0 - : 89e-3 * 2; // W The base energy of ALU average numbers - // from Intel 4G and 773Mhz (Wattch) - base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); - per_access_energy = - 1.15 * 2 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * - g_tp.peri_global.Vdd * - (g_ip->F_sz_nm / - 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; - ////This is per cycle energy(nJ), coefficient based on Wattch FU_height = (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data } else { @@ -300,23 +158,8 @@ void FunctionalUnit::set_params(const ParseXML *XML, // C_EXEU = 0.05e-9; //F // C_FPU = 0.35e-9;//F area.set_area(area_t * num_fu); - leakage *= num_fu; - gate_leakage *= num_fu; double macro_layout_overhead = g_tp.macro_layout_overhead; area.set_area(area.get_area() * macro_layout_overhead); - init_params = true; - set_stats(XML); -} - -void FunctionalUnit::set_stats(const ParseXML *XML) { - mul_accesses = XML->sys.core[ithCore].mul_accesses; - ialu_accesses = XML->sys.core[ithCore].ialu_accesses; - fpu_accesses = XML->sys.core[ithCore].fpu_accesses; - init_stats = true; -} - -void FunctionalUnit::computeArea() { - set_area = true; } @@ -332,6 +175,7 @@ void FunctionalUnit::computePower() { "computePower()\n"; exit(1); } + computeLeakage(); if (fu_type == FPU) { stats_t.readAc.access = num_fu; tdp_stats = stats_t; @@ -494,80 +338,184 @@ void FunctionalUnit::display(uint32_t indent, bool enable) { } } -void FunctionalUnit::leakage_feedback(double temperature) { -// // Update the temperature and initialize the global interfaces. -// interface_ip.temp = (unsigned int)round(temperature / 10.0) * 10; -// -// uca_org_t init_result = init_interface(&interface_ip); // init_result is dummy -// -// // This is part of FunctionalUnit() -// double area_t = 0.0; -// double leakage = 0.0; -// double gate_leakage = 0.0; -// double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); -// -// if (fu_type == FPU) { -// area_t = 4.47 * 1e6 * -// (g_ip->F_sz_nm * g_ip->F_sz_nm / 90.0 / -// 90.0); // this is um^2 The base number -// if (g_ip->F_sz_nm > 90) -// area_t = -// 4.47 * 1e6 * g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 -// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Isub_leakage(5 * g_tp.min_w_nmos_, -// 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; // unit W -// gate_leakage = area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Ig_leakage(5 * g_tp.min_w_nmos_, -// 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; // unit W -// } else if (fu_type == ALU) { -// area_t = 280 * 260 * 2 * num_fu * -// g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl -// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Isub_leakage(20 * g_tp.min_w_nmos_, -// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; // unit W -// gate_leakage = -// area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Ig_leakage(20 * g_tp.min_w_nmos_, -// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; -// } else if (fu_type == MUL) { -// area_t = 280 * 260 * 2 * 3 * num_fu * -// g_tp.scaling_factor.logic_scaling_co_eff; // this is um^2 ALU + MUl -// leakage = area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Isub_leakage(20 * g_tp.min_w_nmos_, -// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; // unit W -// gate_leakage = -// area_t * (g_tp.scaling_factor.core_tx_density) * -// cmos_Ig_leakage(20 * g_tp.min_w_nmos_, -// 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, -// 1, -// inv) * -// g_tp.peri_global.Vdd / 2; -// } else { -// std::cout << "Unknown Functional Unit Type" << std::endl; -// exit(1); -// } -// -// power.readOp.leakage = leakage * num_fu; -// power.readOp.gate_leakage = gate_leakage * num_fu; -// power.readOp.longer_channel_leakage = -// longer_channel_device_reduction(Core_device, coredynp.core_ty) * -// power.readOp.leakage; -// -// double pg_reduction = power_gating_leakage_reduction(false); -// power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; +void FunctionalUnit::computeLeakage() { + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + if (embedded) { + if (fu_type == FPU) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction + // in FPU usually it can have up to 20 cycles. + // base_energy = coredynp.core_ty==Inorder? 0: + // 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + // //This is per Hz energy(nJ) + } else if (fu_type == ALU) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; + // base_energy = coredynp.core_ty==Inorder? + // 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ) + } else if (fu_type == MUL) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; + // base_energy = coredynp.core_ty==Inorder? + // 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and + // 773Mhz (Wattch) base_energy + //*=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); + base_energy = 0; + per_access_energy = + 1.15 * 2 / 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ), coefficient based on Wattch + } else { + std::cerr << "[ FunctionalUnit ]Unknown Functional Unit Type" << std::endl; + exit(0); + } + per_access_energy *= 0.5; // According to ARM data embedded processor has + // much lower per acc energy + } else { + if (fu_type == FPU) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(5 * g_tp.min_w_nmos_, + 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + // energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction + // in FPU usually it can have up to 20 cycles. + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3 * 3; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 * 3 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); // g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + // //This is per op energy(nJ) + } else if (fu_type == ALU) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ) + } else if (fu_type == MUL) { + leakage = area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; // unit W + gate_leakage = + area_t * (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(20 * g_tp.min_w_nmos_, + 20 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, + 1, + inv) * + g_tp.peri_global.Vdd / 2; + base_energy = coredynp.core_ty == Inorder + ? 0 + : 89e-3 * 2; // W The base energy of ALU average numbers + // from Intel 4G and 773Mhz (Wattch) + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / 1.2); + per_access_energy = + 1.15 * 2 / 1e9 / 4 / 1.3 / 1.3 * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (g_ip->F_sz_nm / + 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; + ////This is per cycle energy(nJ), coefficient based on Wattch + } else { + std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << + std::endl; + exit(1); + } + } + + // IEXEU, simple ALU and FPU + // double C_ALU, C_EXEU, C_FPU; //Lum Equivalent capacitance of IEXEU and + // FPU. Based on Intel and Sun 90nm process fabracation. + // + // C_ALU = 0.025e-9;//F + // C_EXEU = 0.05e-9; //F + // C_FPU = 0.35e-9;//F + leakage *= num_fu; + gate_leakage *= num_fu; } diff --git a/src/logic/functional_unit.h b/src/logic/functional_unit.h index 6db759e..13eeef9 100644 --- a/src/logic/functional_unit.h +++ b/src/logic/functional_unit.h @@ -53,7 +53,8 @@ class FunctionalUnit : public Component { InputParameter interface_ip; CoreDynParam coredynp; double FU_height; - double clockRate, executionTime; + double clockRate; + double executionTime; double num_fu; double energy; double base_energy; @@ -88,10 +89,16 @@ class FunctionalUnit : public Component { bool power_gating; bool embedded; + // Power: + double area_t; + // Stats: unsigned int mul_accesses; unsigned int ialu_accesses; unsigned int fpu_accesses; + + // Private Methods: + void computeLeakage(); }; #endif // __FUNCTIONAL_UNIT_H__ From 9b5c6448e7235cc47157941c41d714971d5d4e48 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 21 Jun 2020 00:15:44 -0500 Subject: [PATCH 31/59] refactor: NoC updated NoC now has the updated interface for serialization --- src/cacti/router.cc | 66 ++++++++ src/cacti/router.h | 9 ++ src/noc.cc | 384 +++++++++++++++++++++++--------------------- src/noc.h | 40 +++-- src/processor.cc | 20 +-- 5 files changed, 317 insertions(+), 202 deletions(-) diff --git a/src/cacti/router.cc b/src/cacti/router.cc index 7005f83..e3401ba 100644 --- a/src/cacti/router.cc +++ b/src/cacti/router.cc @@ -31,6 +31,34 @@ #include "router.h" +Router::Router() { + flit_size = 0.0; + deviceType = nullptr; + I = 0.0; + O = 0.0; + M = 0.0; + + vc_buffer_size = 0; + vc_count = 0; + min_w_pmos = 0; + double technology = 0; + + Vdd = 0.0; + + /*Crossbar parameters. Transmisson gate is employed for connector*/ + NTtr = 0.0; + PTtr = 0.0; + wt = 0.0; + ht = 0.0; + NTi = 0.0; + PTi = 0.0; + + NTid = 0.0; + PTid = 0.0; + NTod = 0.0; + PTod = 0.0; +} + Router::Router(double flit_size_, double vc_buf, /* vc size = vc_buffer_size * flit_size */ double vc_c, @@ -64,6 +92,44 @@ Router::Router(double flit_size_, calc_router_parameters(); } +void Router::init(double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_c, + TechnologyParameter::DeviceType *dt, + double I_, + double O_, + double M_) { + flit_size = flit_size_; + deviceType = dt; + I = I_; + O = O_; + M = M_; + + vc_buffer_size = vc_buf; + vc_count = vc_c; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + double technology = g_ip->F_sz_um; + + Vdd = dt->Vdd; + + /*Crossbar parameters. Transmisson gate is employed for connector*/ + NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/ + wt = 15 * technology * 1e-6 / 2; /*track width*/ + ht = 15 * technology * 1e-6 / 2; /*track height*/ + // I = 5; /*Number of crossbar input ports*/ + // O = 5; /*Number of crossbar output ports*/ + NTi = 12.5 * technology * 1e-6 / 2; + PTi = 25 * technology * 1e-6 / 2; + + NTid = 60 * technology * 1e-6 / 2; // m + PTid = 120 * technology * 1e-6 / 2; // m + NTod = 60 * technology * 1e-6 / 2; // m + PTod = 120 * technology * 1e-6 / 2; // m + + calc_router_parameters(); +} + Router::~Router() {} double // wire cap with triple spacing diff --git a/src/cacti/router.h b/src/cacti/router.h index a97c3b4..fb6f1fa 100644 --- a/src/cacti/router.h +++ b/src/cacti/router.h @@ -46,6 +46,7 @@ class Router : public Component { public: + Router(); Router(double flit_size_, double vc_buf, /* vc size = vc_buffer_size * flit_size */ double vc_count, @@ -55,6 +56,14 @@ class Router : public Component { double M_ = 0.6); ~Router(); + void init(double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_count, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global), + double I_ = 5, + double O_ = 5, + double M_ = 0.6); + void print_router(); Component arbiter, crossbar, buffer; diff --git a/src/noc.cc b/src/noc.cc index fe6a0a5..37d4a1b 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -43,19 +43,44 @@ #include #include -NoC::NoC(const ParseXML *XML_interface, - int ithNoC_, - InputParameter *interface_ip_, - double M_traffic_pattern_, - double link_len_) - : XML(XML_interface), ithNoC(ithNoC_), interface_ip(*interface_ip_), - router(0), link_bus(0), link_bus_exist(false), router_exist(false), - M_traffic_pattern(M_traffic_pattern_) { +NoC::NoC() { + embedded = false; + init_stats = false; + init_params = false; + set_area = false; + router_exist = false; + link_bus_exist = false; + link_bus = nullptr; + M_traffic_pattern = 0.0; + link_len = 0.0; + ithNoC = 0; + executionTime = 0.0; + scktRatio = 0.0; + chip_PR_overhead = 0.0; + macro_PR_overhead = 0.0; + long_channel = false; + power_gating = false; + total_accesses = 0; +} + +void NoC::set_params(const ParseXML *XML, + int ithNoC_, + InputParameter *interface_ip_, + double M_traffic_pattern_, + double link_len_) { /* * initialize, compute and optimize individual components. */ - - if (XML->sys.Embedded) { + interface_ip = *interface_ip_; + link_len = link_len_; + M_traffic_pattern = M_traffic_pattern_; + ithNoC = ithNoC_; + embedded = XML->sys.Embedded; + long_channel = XML->sys.longer_channel_device; + power_gating = XML->sys.power_gating; + total_accesses = XML->sys.NoC[ithNoC].total_accesses; + + if (embedded) { interface_ip.wt = Global_30; interface_ip.wire_is_mat_type = 0; interface_ip.wire_os_mat_type = 1; @@ -64,42 +89,25 @@ NoC::NoC(const ParseXML *XML_interface, interface_ip.wire_is_mat_type = 2; interface_ip.wire_os_mat_type = 2; } - set_noc_param(); + set_noc_param(XML); local_result = init_interface(&interface_ip); scktRatio = g_tp.sckt_co_eff; - if (nocdynp.type) { - /* - * if NOC compute router, router links must be computed - * separately and called from external since total chip - * area must be known first - */ - init_router(); - } else { - init_link_bus(link_len_); // if bus compute bus - } - - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. - // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); + init_params = true; } void NoC::init_router() { - router = new Router(nocdynp.flit_size, - nocdynp.virtual_channel_per_port * - nocdynp.input_buffer_entries_per_vc, - nocdynp.virtual_channel_per_port, - &(g_tp.peri_global), - nocdynp.input_ports, - nocdynp.output_ports, - M_traffic_pattern); - // router->print_router(); + router.init(nocdynp.flit_size, + nocdynp.virtual_channel_per_port * + nocdynp.input_buffer_entries_per_vc, + nocdynp.virtual_channel_per_port, + &(g_tp.peri_global), + nocdynp.input_ports, + nocdynp.output_ports, + M_traffic_pattern); + // router.print_router(); area.set_area(area.get_area() + - router->area.get_area() * nocdynp.total_nodes); + router.area.get_area() * nocdynp.total_nodes); double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); @@ -108,54 +116,55 @@ void NoC::init_router() { double pg_reduction_nonretain = power_gating_leakage_reduction( false); // non-state-retaining array structure; - router->power.readOp.longer_channel_leakage = - router->power.readOp.leakage * long_channel_device_reduction; - router->buffer.power.readOp.longer_channel_leakage = - router->buffer.power.readOp.leakage * long_channel_device_reduction; - router->crossbar.power.readOp.longer_channel_leakage = - router->crossbar.power.readOp.leakage * long_channel_device_reduction; - router->arbiter.power.readOp.longer_channel_leakage = - router->arbiter.power.readOp.leakage * long_channel_device_reduction; - - router->buffer.power.readOp.power_gated_leakage = - router->buffer.power.readOp.leakage * + router.power.readOp.longer_channel_leakage = + router.power.readOp.leakage * long_channel_device_reduction; + router.buffer.power.readOp.longer_channel_leakage = + router.buffer.power.readOp.leakage * long_channel_device_reduction; + router.crossbar.power.readOp.longer_channel_leakage = + router.crossbar.power.readOp.leakage * long_channel_device_reduction; + router.arbiter.power.readOp.longer_channel_leakage = + router.arbiter.power.readOp.leakage * long_channel_device_reduction; + + router.buffer.power.readOp.power_gated_leakage = + router.buffer.power.readOp.leakage * pg_reduction_retain; // TODO: this is a simplified version; should use the // power_gated_leakage generated in buff - router->crossbar.power.readOp.power_gated_leakage = - router->crossbar.power.readOp.leakage * pg_reduction_nonretain; - router->arbiter.power.readOp.power_gated_leakage = - router->arbiter.power.readOp.leakage * pg_reduction_nonretain; - router->power.readOp.power_gated_leakage = - router->buffer.power.readOp.power_gated_leakage + - router->crossbar.power.readOp.power_gated_leakage + - router->arbiter.power.readOp.power_gated_leakage; - - router->buffer.power.readOp.power_gated_with_long_channel_leakage = - router->buffer.power.readOp.power_gated_leakage * + router.crossbar.power.readOp.power_gated_leakage = + router.crossbar.power.readOp.leakage * pg_reduction_nonretain; + router.arbiter.power.readOp.power_gated_leakage = + router.arbiter.power.readOp.leakage * pg_reduction_nonretain; + router.power.readOp.power_gated_leakage = + router.buffer.power.readOp.power_gated_leakage + + router.crossbar.power.readOp.power_gated_leakage + + router.arbiter.power.readOp.power_gated_leakage; + + router.buffer.power.readOp.power_gated_with_long_channel_leakage = + router.buffer.power.readOp.power_gated_leakage * long_channel_device_reduction; // TODO: this is a simplified version; // should use the power_gated_leakage // generated in buff - router->crossbar.power.readOp.power_gated_with_long_channel_leakage = - router->crossbar.power.readOp.power_gated_leakage * + router.crossbar.power.readOp.power_gated_with_long_channel_leakage = + router.crossbar.power.readOp.power_gated_leakage * long_channel_device_reduction; - router->arbiter.power.readOp.power_gated_with_long_channel_leakage = - router->arbiter.power.readOp.power_gated_leakage * + router.arbiter.power.readOp.power_gated_with_long_channel_leakage = + router.arbiter.power.readOp.power_gated_leakage * long_channel_device_reduction; - router->power.readOp.power_gated_with_long_channel_leakage = - router->buffer.power.readOp.power_gated_with_long_channel_leakage + - router->crossbar.power.readOp.power_gated_with_long_channel_leakage + - router->arbiter.power.readOp.power_gated_with_long_channel_leakage; + router.power.readOp.power_gated_with_long_channel_leakage = + router.buffer.power.readOp.power_gated_with_long_channel_leakage + + router.crossbar.power.readOp.power_gated_with_long_channel_leakage + + router.arbiter.power.readOp.power_gated_with_long_channel_leakage; router_exist = true; } -void NoC ::init_link_bus(double link_len_) { - +void NoC::init_link_bus(double link_len_) { // if (nocdynp.min_ports==1 ) - if (nocdynp.type) + if (nocdynp.type) { link_name = "Links"; - else + } + else { link_name = "Bus"; + } link_len = link_len_; assert(link_len > 0); @@ -165,8 +174,9 @@ void NoC ::init_link_bus(double link_len_) { link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes) / 2; - if (nocdynp.total_nodes > 1) + if (nocdynp.total_nodes > 1) { link_len /= 2; // All links are shared by neighbors + } link_bus = new interconnect(name, Uncore_device, 1, @@ -186,84 +196,102 @@ void NoC ::init_link_bus(double link_len_) { link_bus_tot_per_Router.area.get_area() * nocdynp.total_nodes); link_bus_exist = true; } -void NoC::computeEnergy(bool is_tdp) { - // power_point_product_masks + +void NoC::computeArea() { + if (nocdynp.type) { + /* + * if NOC compute router, router links must be computed + * separately and called from external since total chip + * area must be known first + */ + init_router(); + } else { + init_link_bus(link_len); // if bus compute bus + } + set_area = true; +} + +void NoC::set_stats(const ParseXML* XML) { + total_accesses = XML->sys.NoC[ithNoC].total_accesses; + init_stats = true; +} + +void NoC::computePower() { double pppm_t[4] = {1, 1, 1, 1}; double M = nocdynp.duty_cycle; - if (is_tdp) { - // init stats for TDP - stats_t.readAc.access = M; - tdp_stats = stats_t; - if (router_exist) { - set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern - router->power = router->power * pppm_t; + // init stats for TDP + stats_t.readAc.access = M; + tdp_stats = stats_t; + if (router_exist) { + set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern + router.power = router.power * pppm_t; + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); + power = power + router.power * pppm_t; + } + if (link_bus_exist) { + if (nocdynp.type) set_pppm(pppm_t, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); - power = power + router->power * pppm_t; - } - if (link_bus_exist) { - if (nocdynp.type) - set_pppm(pppm_t, - 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), - nocdynp.global_linked_ports, - nocdynp.global_linked_ports, - nocdynp.global_linked_ports); - // reset traffic pattern; local port do not have router links - else - set_pppm(pppm_t, - 1 * M_traffic_pattern * M * (nocdynp.min_ports), - nocdynp.global_linked_ports, - nocdynp.global_linked_ports, - nocdynp.global_linked_ports); // reset traffic pattern - - link_bus_tot_per_Router.power = link_bus->power * pppm_t; - + 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), + nocdynp.global_linked_ports, + nocdynp.global_linked_ports, + nocdynp.global_linked_ports); + // reset traffic pattern; local port do not have router links + else set_pppm(pppm_t, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); - power = power + link_bus_tot_per_Router.power * pppm_t; - } - } else { - // init stats for runtime power (RTP) - stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses; - rtp_stats = stats_t; - set_pppm(pppm_t, 1, 0, 0, 0); - if (router_exist) { - router->buffer.rt_power.readOp.dynamic = - (router->buffer.power.readOp.dynamic + - router->buffer.power.writeOp.dynamic) * - rtp_stats.readAc.access; - router->crossbar.rt_power.readOp.dynamic = - router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access; - router->arbiter.rt_power.readOp.dynamic = - router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access; - - router->rt_power = - router->rt_power + - (router->buffer.rt_power + router->crossbar.rt_power + - router->arbiter.rt_power) * - pppm_t + - router->power * pppm_lkg; // TDP power must be calculated first! - rt_power = rt_power + router->rt_power; - } - if (link_bus_exist) { - set_pppm(pppm_t, rtp_stats.readAc.access, 1, 1, rtp_stats.readAc.access); - link_bus->rt_power = link_bus->power * pppm_t; - rt_power = rt_power + link_bus->rt_power; - } + 1 * M_traffic_pattern * M * (nocdynp.min_ports), + nocdynp.global_linked_ports, + nocdynp.global_linked_ports, + nocdynp.global_linked_ports); // reset traffic pattern + + link_bus_tot_per_Router.power = link_bus->power * pppm_t; + + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); + power = power + link_bus_tot_per_Router.power * pppm_t; + } +} + +void NoC::computeRuntimeDynamicPower() { + double pppm_t[4] = {1, 1, 1, 1}; + // init stats for runtime power (RTP) + stats_t.readAc.access = total_accesses; + rtp_stats = stats_t; + set_pppm(pppm_t, 1, 0, 0, 0); + if (router_exist) { + router.buffer.rt_power.readOp.dynamic = + (router.buffer.power.readOp.dynamic + + router.buffer.power.writeOp.dynamic) * + rtp_stats.readAc.access; + router.crossbar.rt_power.readOp.dynamic = + router.crossbar.power.readOp.dynamic * rtp_stats.readAc.access; + router.arbiter.rt_power.readOp.dynamic = + router.arbiter.power.readOp.dynamic * rtp_stats.readAc.access; + + router.rt_power = + router.rt_power + + (router.buffer.rt_power + router.crossbar.rt_power + + router.arbiter.rt_power) * + pppm_t + + router.power * pppm_lkg; // TDP power must be calculated first! + rt_power = rt_power + router.rt_power; + } + if (link_bus_exist) { + set_pppm(pppm_t, rtp_stats.readAc.access, 1, 1, rtp_stats.readAc.access); + link_bus->rt_power = link_bus->power * pppm_t; + rt_power = rt_power + link_bus->rt_power; } } void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); - bool long_channel = XML->sys.longer_channel_device; - bool power_gating = XML->sys.power_gating; double M = M_traffic_pattern * nocdynp.duty_cycle; /*only router as a whole has been applied the M_traffic_pattern(0.6 by @@ -297,111 +325,111 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { if (router_exist) { cout << indent_str << "Router: " << endl; - cout << indent_str_next << "Area = " << router->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << router.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " - << router->power.readOp.dynamic * nocdynp.clockRate << " W" << endl; + << router.power.readOp.dynamic * nocdynp.clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? router->power.readOp.longer_channel_leakage - : router->power.readOp.leakage) + << (long_channel ? router.power.readOp.longer_channel_leakage + : router.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? router->power.readOp.power_gated_with_long_channel_leakage - : router->power.readOp.power_gated_leakage) + ? router.power.readOp.power_gated_with_long_channel_leakage + : router.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << router.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << router->rt_power.readOp.dynamic / nocdynp.executionTime << " W" + << router.rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; cout << endl; if (plevel > 2) { cout << indent_str << indent_str << "Virtual Channel Buffer:" << endl; cout << indent_str << indent_str_next << "Area = " - << router->buffer.area.get_area() * 1e-6 * nocdynp.input_ports + << router.buffer.area.get_area() * 1e-6 * nocdynp.input_ports << " mm^2" << endl; cout << indent_str << indent_str_next << "Peak Dynamic = " - << (router->buffer.power.readOp.dynamic + - router->buffer.power.writeOp.dynamic) * + << (router.buffer.power.readOp.dynamic + + router.buffer.power.writeOp.dynamic) * nocdynp.min_ports * M * nocdynp.clockRate << " W" << endl; cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? router->buffer.power.readOp.longer_channel_leakage * + ? router.buffer.power.readOp.longer_channel_leakage * nocdynp.input_ports - : router->buffer.power.readOp.leakage * + : router.buffer.power.readOp.leakage * nocdynp.input_ports) << " W" << endl; if (power_gating) cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? router->buffer.power.readOp + ? router.buffer.power.readOp .power_gated_with_long_channel_leakage - : router->buffer.power.readOp.power_gated_leakage) + : router.buffer.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str << indent_str_next << "Gate Leakage = " - << router->buffer.power.readOp.gate_leakage * nocdynp.input_ports + << router.buffer.power.readOp.gate_leakage * nocdynp.input_ports << " W" << endl; cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router->buffer.rt_power.readOp.dynamic / nocdynp.executionTime + << router.buffer.rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; cout << endl; cout << indent_str << indent_str << "Crossbar:" << endl; cout << indent_str << indent_str_next - << "Area = " << router->crossbar.area.get_area() * 1e-6 << " mm^2" + << "Area = " << router.crossbar.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str << indent_str_next << "Peak Dynamic = " - << router->crossbar.power.readOp.dynamic * nocdynp.clockRate * + << router.crossbar.power.readOp.dynamic * nocdynp.clockRate * nocdynp.min_ports * M << " W" << endl; cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? router->crossbar.power.readOp.longer_channel_leakage - : router->crossbar.power.readOp.leakage) + ? router.crossbar.power.readOp.longer_channel_leakage + : router.crossbar.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? router->crossbar.power.readOp + ? router.crossbar.power.readOp .power_gated_with_long_channel_leakage - : router->crossbar.power.readOp.power_gated_leakage) + : router.crossbar.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str << indent_str_next - << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage + << "Gate Leakage = " << router.crossbar.power.readOp.gate_leakage << " W" << endl; cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router->crossbar.rt_power.readOp.dynamic / nocdynp.executionTime + << router.crossbar.rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; cout << endl; cout << indent_str << indent_str << "Arbiter:" << endl; cout << indent_str << indent_str_next << "Peak Dynamic = " - << router->arbiter.power.readOp.dynamic * nocdynp.clockRate * + << router.arbiter.power.readOp.dynamic * nocdynp.clockRate * nocdynp.min_ports * M << " W" << endl; cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel - ? router->arbiter.power.readOp.longer_channel_leakage - : router->arbiter.power.readOp.leakage) + ? router.arbiter.power.readOp.longer_channel_leakage + : router.arbiter.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? router->arbiter.power.readOp + ? router.arbiter.power.readOp .power_gated_with_long_channel_leakage - : router->arbiter.power.readOp.power_gated_leakage) + : router.arbiter.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str << indent_str_next - << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage + << "Gate Leakage = " << router.arbiter.power.readOp.gate_leakage << " W" << endl; cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router->arbiter.rt_power.readOp.dynamic / nocdynp.executionTime + << router.arbiter.rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; cout << endl; } @@ -467,8 +495,7 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } -void NoC::set_noc_param() { - +void NoC::set_noc_param(const ParseXML* XML) { nocdynp.type = XML->sys.NoC[ithNoC].type; nocdynp.clockRate = XML->sys.NoC[ithNoC].clockrate; nocdynp.clockRate *= 1e6; @@ -533,11 +560,6 @@ void NoC::set_noc_param() { } NoC ::~NoC() { - - if (router) { - delete router; - router = 0; - } if (link_bus) { delete link_bus; link_bus = 0; diff --git a/src/noc.h b/src/noc.h index 3367bc2..df8f08e 100644 --- a/src/noc.h +++ b/src/noc.h @@ -41,13 +41,14 @@ class NoC : public Component { public: - const ParseXML *XML; int ithNoC; InputParameter interface_ip; double link_len; double executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - Router *router; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + Router router; interconnect *link_bus; NoCParam nocdynp; uca_org_t local_result; @@ -60,22 +61,37 @@ class NoC : public Component { bool router_exist; string name, link_name; double M_traffic_pattern; - NoC(const ParseXML *XML_interface, - int ithNoC_, - InputParameter *interface_ip_, - double M_traffic_pattern_ = 0.6, - double link_len_ = 0); - void set_noc_param(); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + NoC(); + void set_params(const ParseXML *XML, + int ithNoC_, + InputParameter *interface_ip_, + double M_traffic_pattern_ = 0.6, + double link_len_ = 0); + void set_stats(const ParseXML* XML); + void computeArea(); + void computePower(); + void computeRuntimeDynamicPower(); void init_link_bus(double link_len_); - void init_router(); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); // TODO void computeEnergy_link_bus(bool is_tdp = true); void displayEnergy_link_bus(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~NoC(); + +private: + bool embedded; + bool init_stats; + bool init_params; + bool set_area; + bool long_channel; + bool power_gating; + + unsigned int total_accesses; + + void set_noc_param(const ParseXML* XML); + void init_router(); }; #endif /* NOC_H_ */ diff --git a/src/processor.cc b/src/processor.cc index ab4992e..04b4b27 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -428,7 +428,10 @@ Processor::Processor(ParseXML *XML_interface) if (numNOC > 0) { for (i = 0; i < numNOC; i++) { if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used - nocs.push_back(new NoC(XML, i, &interface_ip, 1)); + nocs.push_back(new NoC()); + nocs[i]->set_params(XML, i, &interface_ip, 1); + nocs[i]->set_stats(XML); + nocs[i]->computeArea(); if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area() * procdynp.numNOC); @@ -438,12 +441,11 @@ Processor::Processor(ParseXML *XML_interface) area.set_area(area.get_area() + nocs[i]->area.get_area()); } } else { // Bus based interconnect - nocs.push_back( - new NoC(XML, - i, - &interface_ip, - 1, - sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage))); + nocs.push_back(new NoC()); + nocs[i]->set_params(XML, i, &interface_ip, 1, + sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage)); + nocs[i]->set_stats(XML); + nocs[i]->computeArea(); if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area() * procdynp.numNOC); @@ -484,8 +486,8 @@ Processor::Processor(ParseXML *XML_interface) } // Compute energy of NoC (w or w/o links) or buses for (i = 0; i < numNOC; i++) { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); + nocs[i]->computePower(); + nocs[i]->computeRuntimeDynamicPower(); if (procdynp.homoNOC) { set_pppm(pppm_t, procdynp.numNOC * nocs[i]->nocdynp.clockRate, From 852f4b7d4a65201df6c7f9eac275262f3b0752fb Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 21 Jun 2020 01:01:10 -0500 Subject: [PATCH 32/59] refactor: Interconnect updated Updated interconnect to reflect the new interface --- src/core/exec_unit.cc | 586 ++++++++++++++++------------------- src/core/exec_unit.h | 12 +- src/interconnect.cc | 145 +++++---- src/interconnect.h | 39 +-- src/logic/functional_unit.cc | 15 +- src/noc.cc | 62 ++-- src/noc.h | 6 +- src/processor.cc | 8 +- 8 files changed, 433 insertions(+), 440 deletions(-) diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 06e6d6f..9798c7f 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -49,8 +49,8 @@ EXECU::EXECU(const ParseXML *XML_interface, const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), int_bypass(0), intTagBypass(0), int_mul_bypass(0), - intTag_mul_Bypass(0), fp_bypass(0), fpTagBypass(0), exist(exist_) { + lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), + exist(exist_) { bool exist_flag = true; if (!exist) { return; @@ -107,101 +107,96 @@ EXECU::EXECU(const ParseXML *XML_interface, } if (coredynp.core_ty == Inorder) { - int_bypass = - new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32), - rfu->int_regfile_height + exeu.FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->int_regfile_height + exeu.FU_height + - lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); + int_bypass.init("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32), + rfu->int_regfile_height + exeu.FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); + intTagBypass.init("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->int_regfile_height + exeu.FU_height + lsq_height + + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + intTagBypass.area.get_area()); if (coredynp.num_muls > 0) { - int_mul_bypass = - new interconnect("Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + exeu.FU_height + - mul.FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_mul_bypass.init("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); - intTag_mul_Bypass = - new interconnect("Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->fp_regfile_height + exeu.FU_height + - mul.FU_height + lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_mul_bypass.area.get_area()); + intTag_mul_Bypass.init("Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); + intTag_mul_Bypass.area.get_area()); } if (coredynp.num_fpus > 0) { - fp_bypass = - new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + fp_u.FU_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag", - Core_device, - 1, - 1, - coredynp.perThreadState, - rfu->fp_regfile_height + fp_u.FU_height + - lsq_height + scheu->Iw_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + fp_bypass.init("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), + rfu->fp_regfile_height + fp_u.FU_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + fp_bypass.area.get_area()); + fpTagBypass.init("FP Bypass tag", + Core_device, + 1, + 1, + coredynp.perThreadState, + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + + scheu->Iw_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); + fpTagBypass.area.get_area()); } } else { // OOO if (coredynp.scheu_ty == PhysicalRegFile) { @@ -210,210 +205,200 @@ EXECU::EXECU(const ParseXML *XML_interface, * windows and register files, while tag broadcast interconnects also * cover across ROB */ - int_bypass = new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + - lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_bypass.init("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu.FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); + intTagBypass.init("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu.FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + - exeu.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); + intTagBypass.area.get_area()); if (coredynp.num_muls > 0) { - int_mul_bypass = - new interconnect("Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + - mul.FU_height + lsq_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + mul.FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_mul_bypass.init("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTag_mul_Bypass.init("Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); + int_mul_bypass.area.get_area()); bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); + intTag_mul_Bypass.area.get_area()); } if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u.FU_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - fpTagBypass = new interconnect( - "FP Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + fp_bypass.init("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u.FU_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + fpTagBypass.init("FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - fp_bypass->area.get_area()); + fp_bypass.area.get_area()); bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); + fpTagBypass.area.get_area()); } } else { /* * In RS based processor both data and tag are broadcast together, * covering functional units, lsq, nst windows, register files, and ROBs */ - int_bypass = new interconnect("Int Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + - lsq_height + scheu->Iw_height + - scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + - exeu.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_bypass.init("Int Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu.FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTagBypass.init("Int Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu.FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); bypass.area.set_area(bypass.area.get_area() + - int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() + - intTagBypass->area.get_area()); + intTagBypass.area.get_area()); if (coredynp.num_muls > 0) { - int_mul_bypass = new interconnect( - "Mul Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + mul.FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - intTag_mul_Bypass = new interconnect( - "Mul Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + mul.FU_height + - lsq_height + scheu->Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + int_mul_bypass.init("Mul Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.int_data_width)), + rfu->int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + scheu->Iw_height + + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + intTag_mul_Bypass.init("Mul Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_ireg_width, + rfu->int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + + scheu->Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - int_mul_bypass->area.get_area()); + int_mul_bypass.area.get_area()); bypass.area.set_area(bypass.area.get_area() + - intTag_mul_Bypass->area.get_area()); + intTag_mul_Bypass.area.get_area()); } if (coredynp.num_fpus > 0) { - fp_bypass = new interconnect("FP Bypass Data", - Core_device, - 1, - 1, - int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u.FU_height + - lsq_height + scheu->fp_Iw_height + - scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); - fpTagBypass = new interconnect( - "FP Bypass tag", - Core_device, - 1, - 1, - coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, - &interface_ip, - 3, - false, - 1.0, - coredynp.opt_local, - coredynp.core_ty); + fp_bypass.init("FP Bypass Data", + Core_device, + 1, + 1, + int(ceil(coredynp.fp_data_width)), + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); + fpTagBypass.init("FP Bypass tag", + Core_device, + 1, + 1, + coredynp.phy_freg_width, + rfu->fp_regfile_height + fp_u.FU_height + lsq_height + + scheu->fp_Iw_height + scheu->ROB_height, + &interface_ip, + 3, + false, + 1.0, + coredynp.opt_local, + coredynp.core_ty); bypass.area.set_area(bypass.area.get_area() + - fp_bypass->area.get_area()); + fp_bypass.area.get_area()); bypass.area.set_area(bypass.area.get_area() + - fpTagBypass->area.get_area()); + fpTagBypass.area.get_area()); } } } @@ -433,25 +418,22 @@ void EXECU::computeEnergy(bool is_tdp) { rfu->computeDynamicPower(is_tdp); scheu->computeDynamicPower(is_tdp); - if(is_tdp) { + if (is_tdp) { exeu.computePower(); - } - else { + } else { exeu.computeRuntimeDynamicPower(); } if (coredynp.num_fpus > 0) { - if(is_tdp) { + if (is_tdp) { fp_u.computePower(); - } - else { + } else { fp_u.computeRuntimeDynamicPower(); } } if (coredynp.num_muls > 0) { - if(is_tdp) { + if (is_tdp) { mul.computePower(); - } - else { + } else { mul.computeRuntimeDynamicPower(); } } @@ -465,8 +447,8 @@ void EXECU::computeEnergy(bool is_tdp) { 2 * coredynp .ALU_cdb_duty_cycle); // 2 means two source operands needs to be // passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power * pppm_t + - int_bypass->power * pppm_t; + bypass.power = + bypass.power + intTagBypass.power * pppm_t + int_bypass.power * pppm_t; if (coredynp.num_muls > 0) { set_pppm( pppm_t, @@ -476,8 +458,8 @@ void EXECU::computeEnergy(bool is_tdp) { 2 * coredynp .MUL_cdb_duty_cycle); // 2 means two source operands needs to // be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power * pppm_t + - int_mul_bypass->power * pppm_t; + bypass.power = bypass.power + intTag_mul_Bypass.power * pppm_t + + int_mul_bypass.power * pppm_t; power = power + mul.power; } if (coredynp.num_fpus > 0) { @@ -489,8 +471,8 @@ void EXECU::computeEnergy(bool is_tdp) { 3 * coredynp .FPU_cdb_duty_cycle); // 3 means three source operands needs // to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power * pppm_t + - fpTagBypass->power * pppm_t; + bypass.power = + bypass.power + fp_bypass.power * pppm_t + fpTagBypass.power * pppm_t; power = power + fp_u.power; } @@ -501,8 +483,8 @@ void EXECU::computeEnergy(bool is_tdp) { 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power * pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + intTagBypass.power * pppm_t; + bypass.rt_power = bypass.rt_power + int_bypass.power * pppm_t; if (coredynp.num_muls > 0) { set_pppm(pppm_t, @@ -512,8 +494,8 @@ void EXECU::computeEnergy(bool is_tdp) { XML->sys.core[ithCore] .cdb_mul_accesses); // 2 means two source operands needs to // be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power * pppm_t + - int_mul_bypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + intTag_mul_Bypass.power * pppm_t + + int_mul_bypass.power * pppm_t; rt_power = rt_power + mul.rt_power; } @@ -523,8 +505,8 @@ void EXECU::computeEnergy(bool is_tdp) { 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power * pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power * pppm_t; + bypass.rt_power = bypass.rt_power + fp_bypass.power * pppm_t; + bypass.rt_power = bypass.rt_power + fpTagBypass.power * pppm_t; rt_power = rt_power + fp_u.rt_power; } rt_power = rt_power + rfu->rt_power + exeu.rt_power + bypass.rt_power + @@ -646,32 +628,8 @@ void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } EXECU ::~EXECU() { - - if (!exist) + if (!exist) { return; - if (int_bypass) { - delete int_bypass; - int_bypass = 0; - } - if (intTagBypass) { - delete intTagBypass; - intTagBypass = 0; - } - if (int_mul_bypass) { - delete int_mul_bypass; - int_mul_bypass = 0; - } - if (intTag_mul_Bypass) { - delete intTag_mul_Bypass; - intTag_mul_Bypass = 0; - } - if (fp_bypass) { - delete fp_bypass; - fp_bypass = 0; - } - if (fpTagBypass) { - delete fpTagBypass; - fpTagBypass = 0; } if (rfu) { delete rfu; diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 75c40dd..5334ff5 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -58,12 +58,12 @@ class EXECU : public Component { FunctionalUnit fp_u; FunctionalUnit exeu; FunctionalUnit mul; - interconnect *int_bypass; - interconnect *intTagBypass; - interconnect *int_mul_bypass; - interconnect *intTag_mul_Bypass; - interconnect *fp_bypass; - interconnect *fpTagBypass; + interconnect int_bypass; + interconnect intTagBypass; + interconnect int_mul_bypass; + interconnect intTag_mul_Bypass; + interconnect fp_bypass; + interconnect fpTagBypass; Component bypass; bool exist; diff --git a/src/interconnect.cc b/src/interconnect.cc index 29015e3..5db5252 100644 --- a/src/interconnect.cc +++ b/src/interconnect.cc @@ -37,30 +37,67 @@ #include #include -interconnect::interconnect(string name_, - enum Device_ty device_ty_, - double base_w, - double base_h, - int data_w, - double len, - const InputParameter *configure_interface, - int start_wiring_level_, - bool pipelinable_, - double route_over_perc_, - bool opt_local_, - enum Core_type core_ty_, - enum Wire_type wire_model, - double width_s, - double space_s, - TechnologyParameter::DeviceType *dt) - : name(name_), device_ty(device_ty_), in_rise_time(0), out_rise_time(0), - base_width(base_w), base_height(base_h), data_width(data_w), - wt(wire_model), width_scaling(width_s), space_scaling(space_s), - start_wiring_level(start_wiring_level_), length(len), - // interconnect_latency(1e-12), - // interconnect_throughput(1e-12), - opt_local(opt_local_), core_ty(core_ty_), pipelinable(pipelinable_), - route_over_perc(route_over_perc_), deviceType(dt) { +interconnect::interconnect() { + name = ""; + in_rise_time = 0.0; + out_rise_time = 0.0; + max_unpipelined_link_delay = 0.0; + wire_bw = 0.0; + init_wire_bw = 0.0; // bus width at root + base_width = 0.0; + base_height = 0.0; + data_width = 0; + width_scaling = 0.0; + space_scaling = 0.0; + start_wiring_level = 0; + length = 0.0; + min_w_nmos = 0.0; + min_w_pmos = 0.0; + latency = 0.0; + throughput = 0.0; + latency_overflow = false; + throughput_overflow = false; + interconnect_latency = 0.0; + interconnect_throughput = 0.0; + opt_local = false; + pipelinable = false; + route_over_perc = 0.0; + num_pipe_stages = 0; +} + +void interconnect::init(string name_, + enum Device_ty device_ty_, + double base_w, + double base_h, + int data_w, + double len, + const InputParameter *configure_interface, + int start_wiring_level_, + bool pipelinable_, + double route_over_perc_, + bool opt_local_, + enum Core_type core_ty_, + enum Wire_type wire_model, + double width_s, + double space_s, + TechnologyParameter::DeviceType *dt) { + name = name_; + device_ty = device_ty_; + in_rise_time = 0; + out_rise_time = 0; + base_width = base_w; + base_height = base_h; + data_width = data_w; + wt = wire_model; + width_scaling = width_s; + space_scaling = space_s; + start_wiring_level = start_wiring_level_; + length = len; + opt_local = opt_local_; + core_ty = core_ty_; + pipelinable = pipelinable_; + route_over_perc = route_over_perc_; + deviceType = dt; wt = Global; l_ip = *configure_interface; @@ -163,7 +200,6 @@ interconnect::interconnect(string name_, } void interconnect::compute() { - Wire *wtemp1 = 0; wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); delay = wtemp1->delay; @@ -175,35 +211,36 @@ void interconnect::compute() { no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); no_device_under_wire_area.w = length; - if (wtemp1) + if (wtemp1) { delete wtemp1; + } } -void interconnect::leakage_feedback( - double temperature) // TODO: add code for processing power gating -{ - l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy - - compute(); - - power_bit = power; - power.readOp.dynamic *= data_width; - power.readOp.leakage *= data_width; - power.readOp.gate_leakage *= data_width; - - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); - - double long_channel_device_reduction = - longer_channel_device_reduction(device_ty, core_ty); - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - power.readOp.longer_channel_leakage = - power.readOp.leakage * long_channel_device_reduction; -} +// void interconnect::leakage_feedback( +// double temperature) // TODO: add code for processing power gating +//{ +// l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; +// uca_org_t init_result = init_interface(&l_ip); // init_result is dummy +// +// compute(); +// +// power_bit = power; +// power.readOp.dynamic *= data_width; +// power.readOp.leakage *= data_width; +// power.readOp.gate_leakage *= data_width; +// +// assert(power.readOp.dynamic > 0); +// assert(power.readOp.leakage > 0); +// assert(power.readOp.gate_leakage > 0); +// +// double long_channel_device_reduction = +// longer_channel_device_reduction(device_ty, core_ty); +// +// double sckRation = g_tp.sckt_co_eff; +// power.readOp.dynamic *= sckRation; +// power.writeOp.dynamic *= sckRation; +// power.searchOp.dynamic *= sckRation; +// +// power.readOp.longer_channel_leakage = +// power.readOp.leakage * long_channel_device_reduction; +//} diff --git a/src/interconnect.h b/src/interconnect.h index faf173f..e5ae11c 100644 --- a/src/interconnect.h +++ b/src/interconnect.h @@ -46,26 +46,25 @@ class interconnect : public Component { public: - interconnect(string name_, - enum Device_ty device_ty_, - double base_w, - double base_h, - int data_w, - double len, - const InputParameter *configure_interface, - int start_wiring_level_, - bool pipelinable_ = false, - double route_over_perc_ = 0.5, - bool opt_local_ = true, - enum Core_type core_ty_ = Inorder, - enum Wire_type wire_model = Global, - double width_s = 1.0, - double space_s = 1.0, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); - + interconnect(); + void init(string name_, + enum Device_ty device_ty_, + double base_w, + double base_h, + int data_w, + double len, + const InputParameter *configure_interface, + int start_wiring_level_, + bool pipelinable_ = false, + double route_over_perc_ = 0.5, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, + enum Wire_type wire_model = Global, + double width_s = 1.0, + double space_s = 1.0, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~interconnect(){}; - void compute(); string name; enum Device_ty device_ty; double in_rise_time, out_rise_time; @@ -74,7 +73,7 @@ class interconnect : public Component { Area no_device_under_wire_area; void set_in_rise_time(double rt) { in_rise_time = rt; } - void leakage_feedback(double temperature); + // void leakage_feedback(double temperature); double max_unpipelined_link_delay; powerDef power_bit; @@ -101,6 +100,8 @@ class interconnect : public Component { int num_pipe_stages; private: + void compute(); + TechnologyParameter::DeviceType *deviceType; }; diff --git a/src/logic/functional_unit.cc b/src/logic/functional_unit.cc index 4c22987..49c4b1f 100644 --- a/src/logic/functional_unit.cc +++ b/src/logic/functional_unit.cc @@ -112,8 +112,8 @@ void FunctionalUnit::computeArea() { FU_height = (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data } else { - std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << - std::endl; + std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" + << std::endl; exit(0); } } else { @@ -144,8 +144,8 @@ void FunctionalUnit::computeArea() { FU_height = (9334 * num_fu) * interface_ip.F_sz_um; // divider/mul from Sun's data } else { - std::cout << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << - std::endl; + std::cout << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" + << std::endl; exit(1); } } @@ -419,7 +419,8 @@ void FunctionalUnit::computeLeakage() { 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; ////This is per cycle energy(nJ), coefficient based on Wattch } else { - std::cerr << "[ FunctionalUnit ]Unknown Functional Unit Type" << std::endl; + std::cerr << "[ FunctionalUnit ]Unknown Functional Unit Type" + << std::endl; exit(0); } per_access_energy *= 0.5; // According to ARM data embedded processor has @@ -503,8 +504,8 @@ void FunctionalUnit::computeLeakage() { 90.0); //(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; ////This is per cycle energy(nJ), coefficient based on Wattch } else { - std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" << - std::endl; + std::cerr << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" + << std::endl; exit(1); } } diff --git a/src/noc.cc b/src/noc.cc index 37d4a1b..acc9158 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -50,7 +50,6 @@ NoC::NoC() { set_area = false; router_exist = false; link_bus_exist = false; - link_bus = nullptr; M_traffic_pattern = 0.0; link_len = 0.0; ithNoC = 0; @@ -106,8 +105,7 @@ void NoC::init_router() { nocdynp.output_ports, M_traffic_pattern); // router.print_router(); - area.set_area(area.get_area() + - router.area.get_area() * nocdynp.total_nodes); + area.set_area(area.get_area() + router.area.get_area() * nocdynp.total_nodes); double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); @@ -161,8 +159,7 @@ void NoC::init_link_bus(double link_len_) { // if (nocdynp.min_ports==1 ) if (nocdynp.type) { link_name = "Links"; - } - else { + } else { link_name = "Bus"; } @@ -177,20 +174,20 @@ void NoC::init_link_bus(double link_len_) { if (nocdynp.total_nodes > 1) { link_len /= 2; // All links are shared by neighbors } - link_bus = new interconnect(name, - Uncore_device, - 1, - 1, - nocdynp.flit_size, - link_len, - &interface_ip, - 3, - true /*pipelinable*/, - nocdynp.route_over_perc); + link_bus.init(name, + Uncore_device, + 1, + 1, + nocdynp.flit_size, + link_len, + &interface_ip, + 3, + true /*pipelinable*/, + nocdynp.route_over_perc); link_bus_tot_per_Router.area.set_area( link_bus_tot_per_Router.area.get_area() + - link_bus->area.get_area() * nocdynp.global_linked_ports); + link_bus.area.get_area() * nocdynp.global_linked_ports); area.set_area(area.get_area() + link_bus_tot_per_Router.area.get_area() * nocdynp.total_nodes); @@ -198,7 +195,7 @@ void NoC::init_link_bus(double link_len_) { } void NoC::computeArea() { - if (nocdynp.type) { + if (nocdynp.type) { /* * if NOC compute router, router links must be computed * separately and called from external since total chip @@ -211,7 +208,7 @@ void NoC::computeArea() { set_area = true; } -void NoC::set_stats(const ParseXML* XML) { +void NoC::set_stats(const ParseXML *XML) { total_accesses = XML->sys.NoC[ithNoC].total_accesses; init_stats = true; } @@ -247,7 +244,7 @@ void NoC::computePower() { nocdynp.global_linked_ports, nocdynp.global_linked_ports); // reset traffic pattern - link_bus_tot_per_Router.power = link_bus->power * pppm_t; + link_bus_tot_per_Router.power = link_bus.power * pppm_t; set_pppm(pppm_t, nocdynp.total_nodes, @@ -284,8 +281,8 @@ void NoC::computeRuntimeDynamicPower() { } if (link_bus_exist) { set_pppm(pppm_t, rtp_stats.readAc.access, 1, 1, rtp_stats.readAc.access); - link_bus->rt_power = link_bus->power * pppm_t; - rt_power = rt_power + link_bus->rt_power; + link_bus.rt_power = link_bus.power * pppm_t; + rt_power = rt_power + link_bus.rt_power; } } @@ -334,12 +331,11 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { : router.power.readOp.leakage) << " W" << endl; if (power_gating) - cout - << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? router.power.readOp.power_gated_with_long_channel_leakage - : router.power.readOp.power_gated_leakage) - << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? router.power.readOp.power_gated_with_long_channel_leakage + : router.power.readOp.power_gated_leakage) + << " W" << endl; cout << indent_str_next << "Gate Leakage = " << router.power.readOp.gate_leakage << " W" << endl; @@ -361,8 +357,7 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << (long_channel ? router.buffer.power.readOp.longer_channel_leakage * nocdynp.input_ports - : router.buffer.power.readOp.leakage * - nocdynp.input_ports) + : router.buffer.power.readOp.leakage * nocdynp.input_ports) << " W" << endl; if (power_gating) cout << indent_str << indent_str_next @@ -458,7 +453,7 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << link_bus->rt_power.readOp.dynamic / nocdynp.executionTime << " W" + << link_bus.rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; cout << endl; } @@ -495,7 +490,7 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } -void NoC::set_noc_param(const ParseXML* XML) { +void NoC::set_noc_param(const ParseXML *XML) { nocdynp.type = XML->sys.NoC[ithNoC].type; nocdynp.clockRate = XML->sys.NoC[ithNoC].clockrate; nocdynp.clockRate *= 1e6; @@ -560,8 +555,5 @@ void NoC::set_noc_param(const ParseXML* XML) { } NoC ::~NoC() { - if (link_bus) { - delete link_bus; - link_bus = 0; - } + // Do Nothing } diff --git a/src/noc.h b/src/noc.h index df8f08e..18907cb 100644 --- a/src/noc.h +++ b/src/noc.h @@ -49,7 +49,7 @@ class NoC : public Component { double chip_PR_overhead; double macro_PR_overhead; Router router; - interconnect *link_bus; + interconnect link_bus; NoCParam nocdynp; uca_org_t local_result; statsDef tdp_stats; @@ -67,7 +67,7 @@ class NoC : public Component { InputParameter *interface_ip_, double M_traffic_pattern_ = 0.6, double link_len_ = 0); - void set_stats(const ParseXML* XML); + void set_stats(const ParseXML *XML); void computeArea(); void computePower(); void computeRuntimeDynamicPower(); @@ -90,7 +90,7 @@ class NoC : public Component { unsigned int total_accesses; - void set_noc_param(const ParseXML* XML); + void set_noc_param(const ParseXML *XML); void init_router(); }; diff --git a/src/processor.cc b/src/processor.cc index 04b4b27..0725c11 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -442,8 +442,12 @@ Processor::Processor(ParseXML *XML_interface) } } else { // Bus based interconnect nocs.push_back(new NoC()); - nocs[i]->set_params(XML, i, &interface_ip, 1, - sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage)); + nocs[i]->set_params( + XML, + i, + &interface_ip, + 1, + sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage)); nocs[i]->set_stats(XML); nocs[i]->computeArea(); if (procdynp.homoNOC) { From 43e85bd2502fad36a818ba727c9f7518e0a35ce0 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 21 Jun 2020 01:48:27 -0500 Subject: [PATCH 33/59] refactor: Preparing processor class for refactor --- src/main.cc | 10 ++- src/noc.cc | 206 +++++++++++++++++++++-------------------------- src/noc.h | 2 +- src/processor.cc | 25 +++++- src/processor.h | 39 ++++++--- 5 files changed, 147 insertions(+), 135 deletions(-) diff --git a/src/main.cc b/src/main.cc index c23fac7..986113b 100644 --- a/src/main.cc +++ b/src/main.cc @@ -40,14 +40,12 @@ using namespace std; -void print_usage(char *argv0); - int main(int argc, char *argv[]) { mcpat::Options opt; - if (!opt.parse(argc, argv)) { return 1; } + opt_for_clk = opt.opt_for_clk; cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " @@ -56,7 +54,11 @@ int main(int argc, char *argv[]) { // parse XML-based interface ParseXML *p1 = new ParseXML(); p1->parse(opt.input_xml); - Processor proc(p1); + Processor proc; + proc.init(p1); + proc.computeArea(); + proc.computePower(); + proc.computeRuntimeDynamicPower(); proc.displayEnergy(2, opt.print_level); delete p1; return 0; diff --git a/src/noc.cc b/src/noc.cc index acc9158..5383b37 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -286,7 +286,7 @@ void NoC::computeRuntimeDynamicPower() { } } -void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +void NoC::display(uint32_t indent, int plevel, bool is_tdp) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); @@ -297,196 +297,170 @@ void NoC::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { * with McPAT's extra traffic pattern. * */ if (is_tdp) { - cout << name << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str + std::cout << name << std::endl; + std::cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic * nocdynp.clockRate - << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " + << " W" << std::endl; + std::cout << indent_str << "Subthreshold Leakage = " << (long_channel ? power.readOp.longer_channel_leakage : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str << "Subthreshold Leakage with power gating = " << (power.readOp.power_gated_leakage * (long_channel ? power.readOp.longer_channel_leakage / power.readOp.leakage : 1)) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str << "Runtime Dynamic = " - << rt_power.readOp.dynamic / nocdynp.executionTime << " W" << endl; - cout << endl; + << " W" << std::endl; + } + std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str << "Runtime Dynamic = " + << rt_power.readOp.dynamic / nocdynp.executionTime << " W" << std::endl; + std::cout << std::endl; if (router_exist) { - cout << indent_str << "Router: " << endl; - cout << indent_str_next << "Area = " << router.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " - << router.power.readOp.dynamic * nocdynp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " + std::cout << indent_str << "Router: " << std::endl; + std::cout << indent_str_next << "Area = " << router.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next << "Peak Dynamic = " + << router.power.readOp.dynamic * nocdynp.clockRate << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " << (long_channel ? router.power.readOp.longer_channel_leakage : router.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? router.power.readOp.power_gated_with_long_channel_leakage : router.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next + << " W" << std::endl; + } + std::cout << indent_str_next << "Gate Leakage = " << router.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next << "Runtime Dynamic = " + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " << router.rt_power.readOp.dynamic / nocdynp.executionTime << " W" - << endl; - cout << endl; + << std::endl; + std::cout << std::endl; if (plevel > 2) { - cout << indent_str << indent_str << "Virtual Channel Buffer:" << endl; - cout << indent_str << indent_str_next << "Area = " + std::cout << indent_str << indent_str << "Virtual Channel Buffer:" << std::endl; + std::cout << indent_str << indent_str_next << "Area = " << router.buffer.area.get_area() * 1e-6 * nocdynp.input_ports - << " mm^2" << endl; - cout << indent_str << indent_str_next << "Peak Dynamic = " + << " mm^2" << std::endl; + std::cout << indent_str << indent_str_next << "Peak Dynamic = " << (router.buffer.power.readOp.dynamic + router.buffer.power.writeOp.dynamic) * nocdynp.min_ports * M * nocdynp.clockRate - << " W" << endl; - cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel ? router.buffer.power.readOp.longer_channel_leakage * nocdynp.input_ports : router.buffer.power.readOp.leakage * nocdynp.input_ports) - << " W" << endl; - if (power_gating) - cout << indent_str << indent_str_next + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? router.buffer.power.readOp .power_gated_with_long_channel_leakage : router.buffer.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << indent_str_next << "Gate Leakage = " + << " W" << std::endl; + } + std::cout << indent_str << indent_str_next << "Gate Leakage = " << router.buffer.power.readOp.gate_leakage * nocdynp.input_ports - << " W" << endl; - cout << indent_str << indent_str_next << "Runtime Dynamic = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Runtime Dynamic = " << router.buffer.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << endl; - cout << endl; - cout << indent_str << indent_str << "Crossbar:" << endl; - cout << indent_str << indent_str_next + << " W" << std::endl; + std::cout << std::endl; + std::cout << indent_str << indent_str << "Crossbar:" << std::endl; + std::cout << indent_str << indent_str_next << "Area = " << router.crossbar.area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str << indent_str_next << "Peak Dynamic = " + << std::endl; + std::cout << indent_str << indent_str_next << "Peak Dynamic = " << router.crossbar.power.readOp.dynamic * nocdynp.clockRate * nocdynp.min_ports * M - << " W" << endl; - cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel ? router.crossbar.power.readOp.longer_channel_leakage : router.crossbar.power.readOp.leakage) - << " W" << endl; + << " W" << std::endl; if (power_gating) - cout << indent_str << indent_str_next + std::cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? router.crossbar.power.readOp .power_gated_with_long_channel_leakage : router.crossbar.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << indent_str_next + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Gate Leakage = " << router.crossbar.power.readOp.gate_leakage - << " W" << endl; - cout << indent_str << indent_str_next << "Runtime Dynamic = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Runtime Dynamic = " << router.crossbar.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << endl; - cout << endl; - cout << indent_str << indent_str << "Arbiter:" << endl; - cout << indent_str << indent_str_next << "Peak Dynamic = " + << " W" << std::endl; + std::cout << std::endl; + std::cout << indent_str << indent_str << "Arbiter:" << std::endl; + std::cout << indent_str << indent_str_next << "Peak Dynamic = " << router.arbiter.power.readOp.dynamic * nocdynp.clockRate * nocdynp.min_ports * M - << " W" << endl; - cout << indent_str << indent_str_next << "Subthreshold Leakage = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " << (long_channel ? router.arbiter.power.readOp.longer_channel_leakage : router.arbiter.power.readOp.leakage) - << " W" << endl; + << " W" << std::endl; if (power_gating) - cout << indent_str << indent_str_next + std::cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? router.arbiter.power.readOp .power_gated_with_long_channel_leakage : router.arbiter.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << indent_str_next + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Gate Leakage = " << router.arbiter.power.readOp.gate_leakage - << " W" << endl; - cout << indent_str << indent_str_next << "Runtime Dynamic = " + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Runtime Dynamic = " << router.arbiter.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << endl; - cout << endl; + << " W" << std::endl; + std::cout << std::endl; } } if (link_bus_exist) { - cout << indent_str << (nocdynp.type ? "Per Router " : "") << link_name - << ": " << endl; - cout << indent_str_next + std::cout << indent_str << (nocdynp.type ? "Per Router " : "") << link_name + << ": " << std::endl; + std::cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " + << " mm^2" << std::endl; + std::cout << indent_str_next << "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic * nocdynp.clockRate - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " + << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " << (long_channel ? link_bus_tot_per_Router.power.readOp.longer_channel_leakage : link_bus_tot_per_Router.power.readOp.leakage) - << " W" << endl; + << " W" << std::endl; if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " + std::cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? link_bus_tot_per_Router.power.readOp .power_gated_with_long_channel_leakage : link_bus_tot_per_Router.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next << "Gate Leakage = " - << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " + << " W" << std::endl; + std::cout << indent_str_next << "Gate Leakage = " + << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " << link_bus.rt_power.readOp.dynamic / nocdynp.executionTime << " W" - << endl; - cout << endl; + << std::endl; + std::cout << std::endl; } } else { - // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = - //" - //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " - // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << - // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << - // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next - //<< "Load Store Unit Peak Dynamic = " << - // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << - // lsu->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "Load Store Unit Gate Leakage = " << - // lsu->rt_power.readOp.gate_leakage - //<< " W" << endl; cout << indent_str_next << "Memory Management Unit - // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << - // endl; cout << indent_str_next << "Memory Management Unit Subthreshold - // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout - // << indent_str_next << "Memory Management Unit Gate Leakage = " << - // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << - // indent_str_next << "Execution Unit Peak Dynamic = " << - // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout - // << indent_str_next << "Execution Unit Subthreshold Leakage = " << - // exu->rt_power.readOp.leakage << " W" << endl; cout << - // indent_str_next - // << "Execution Unit Gate Leakage = " << - // exu->rt_power.readOp.gate_leakage - //<< " W" << endl; } } diff --git a/src/noc.h b/src/noc.h index 18907cb..4e75dcd 100644 --- a/src/noc.h +++ b/src/noc.h @@ -72,7 +72,7 @@ class NoC : public Component { void computePower(); void computeRuntimeDynamicPower(); void init_link_bus(double link_len_); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + void display(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); // TODO void computeEnergy_link_bus(bool is_tdp = true); void displayEnergy_link_bus(uint32_t indent = 0, diff --git a/src/processor.cc b/src/processor.cc index 0725c11..601b4a1 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -45,13 +45,18 @@ #include #include -Processor::Processor(ParseXML *XML_interface) - : XML(XML_interface) { // TODO: using one global copy may have problems. +Processor::Processor() { + +} + +void Processor::init(const ParseXML *XML) { + // TODO: using one global copy may have problems. /* - * placement and routing overhead is 10%, core scales worse than cache 40% is + * placement and routing overhead is 10%, core scales worse than cache 40% is * accumulated from 90 to 22nm There is no point to have heterogeneous memory * controller on chip, thus McPAT only support homogeneous memory controllers. */ + this->XML = XML; int i; double pppm_t[4] = {1, 1, 1, 1}; set_proc_param(); @@ -971,7 +976,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } for (i = 0; i < numNOC; i++) { - nocs[i]->displayEnergy(indent + 4, plevel, is_tdp); + nocs[i]->display(indent + 4, plevel, is_tdp); cout << "**************************************************************" "***************************" << endl; @@ -981,6 +986,18 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } +void Processor::computeArea() { + +} + +void Processor::computePower() { + +} + +void Processor::computeRuntimeDynamicPower() { + +} + void Processor::set_proc_param() { bool debug = false; diff --git a/src/processor.h b/src/processor.h index 416d4d4..82ddc93 100644 --- a/src/processor.h +++ b/src/processor.h @@ -51,7 +51,16 @@ class Processor : public Component { public: - ParseXML *XML; + Processor(); + void init(const ParseXML* XML); + void computeArea(); + void computePower(); + void computeRuntimeDynamicPower(); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + ~Processor(); + +private: + const ParseXML *XML; vector cores; vector l2array; vector l3array; @@ -64,18 +73,28 @@ class Processor : public Component { FlashController flashcontroller; InputParameter interface_ip; ProcParam procdynp; - // wire globalInterconnect; - // clock_network globalClock; - Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies, - flashcontrollers; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface); - void compute(); + + // Used for total Area Calcs: + Component core; + Component l2; + Component l3; + Component l1dir; + Component l2dir; + Component noc; + Component mcs; + Component cc; + Component nius; + Component pcies; + Component flashcontrollers; + int numCore; + int numL2; + int numL3; + int numNOC; + int numL1Dir; + int numL2Dir; void set_proc_param(); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); void displayDeviceType(int device_type_, uint32_t indent = 0); void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); - ~Processor(); }; #endif /* PROCESSOR_H_ */ From 32f4264df652140cfdf47d6a5323ba8f348607d5 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 21 Jun 2020 11:50:02 -0500 Subject: [PATCH 34/59] refoactor: Processor Using an init funtion that optionally can skip ComputeArea calculations. --- src/main.cc | 11 +- src/noc.cc | 229 +++++----- src/processor.cc | 1058 ++++++++++++++++++++++++---------------------- src/processor.h | 15 +- 4 files changed, 688 insertions(+), 625 deletions(-) diff --git a/src/main.cc b/src/main.cc index 986113b..1f9b9dc 100644 --- a/src/main.cc +++ b/src/main.cc @@ -54,11 +54,16 @@ int main(int argc, char *argv[]) { // parse XML-based interface ParseXML *p1 = new ParseXML(); p1->parse(opt.input_xml); + // if(!opt.serialization_restore) { Processor proc; proc.init(p1); - proc.computeArea(); - proc.computePower(); - proc.computeRuntimeDynamicPower(); + // save(proc, opt.serialization_path+"mp_checkpoint.txt"); + //} + // else { + // Processor proc; + // restore(proc, opt.serialization_path+"mp_checkpoint.txt"); + // proc.init(p1, true); + //} proc.displayEnergy(2, opt.print_level); delete p1; return 0; diff --git a/src/noc.cc b/src/noc.cc index 5383b37..ea23754 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -299,165 +299,178 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { if (is_tdp) { std::cout << name << std::endl; std::cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << std::endl; + << std::endl; std::cout << indent_str - << "Peak Dynamic = " << power.readOp.dynamic * nocdynp.clockRate - << " W" << std::endl; + << "Peak Dynamic = " << power.readOp.dynamic * nocdynp.clockRate + << " W" << std::endl; std::cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; if (power_gating) { std::cout << indent_str << "Subthreshold Leakage with power gating = " - << (power.readOp.power_gated_leakage * - (long_channel - ? power.readOp.longer_channel_leakage / power.readOp.leakage - : 1)) - << " W" << std::endl; + << (power.readOp.power_gated_leakage * + (long_channel ? power.readOp.longer_channel_leakage / + power.readOp.leakage + : 1)) + << " W" << std::endl; } - std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << std::endl; + std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << std::endl; std::cout << indent_str << "Runtime Dynamic = " - << rt_power.readOp.dynamic / nocdynp.executionTime << " W" << std::endl; + << rt_power.readOp.dynamic / nocdynp.executionTime << " W" + << std::endl; std::cout << std::endl; if (router_exist) { std::cout << indent_str << "Router: " << std::endl; std::cout << indent_str_next << "Area = " << router.area.get_area() * 1e-6 - << " mm^2" << std::endl; + << " mm^2" << std::endl; std::cout << indent_str_next << "Peak Dynamic = " - << router.power.readOp.dynamic * nocdynp.clockRate << " W" << std::endl; + << router.power.readOp.dynamic * nocdynp.clockRate << " W" + << std::endl; std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? router.power.readOp.longer_channel_leakage - : router.power.readOp.leakage) - << " W" << std::endl; + << (long_channel ? router.power.readOp.longer_channel_leakage + : router.power.readOp.leakage) + << " W" << std::endl; if (power_gating) { - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? router.power.readOp.power_gated_with_long_channel_leakage - : router.power.readOp.power_gated_leakage) - << " W" << std::endl; + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? router.power.readOp.power_gated_with_long_channel_leakage + : router.power.readOp.power_gated_leakage) + << " W" << std::endl; } std::cout << indent_str_next - << "Gate Leakage = " << router.power.readOp.gate_leakage << " W" - << std::endl; + << "Gate Leakage = " << router.power.readOp.gate_leakage << " W" + << std::endl; std::cout << indent_str_next << "Runtime Dynamic = " - << router.rt_power.readOp.dynamic / nocdynp.executionTime << " W" - << std::endl; + << router.rt_power.readOp.dynamic / nocdynp.executionTime + << " W" << std::endl; std::cout << std::endl; if (plevel > 2) { - std::cout << indent_str << indent_str << "Virtual Channel Buffer:" << std::endl; + std::cout << indent_str << indent_str + << "Virtual Channel Buffer:" << std::endl; std::cout << indent_str << indent_str_next << "Area = " - << router.buffer.area.get_area() * 1e-6 * nocdynp.input_ports - << " mm^2" << std::endl; + << router.buffer.area.get_area() * 1e-6 * nocdynp.input_ports + << " mm^2" << std::endl; std::cout << indent_str << indent_str_next << "Peak Dynamic = " - << (router.buffer.power.readOp.dynamic + - router.buffer.power.writeOp.dynamic) * - nocdynp.min_ports * M * nocdynp.clockRate - << " W" << std::endl; + << (router.buffer.power.readOp.dynamic + + router.buffer.power.writeOp.dynamic) * + nocdynp.min_ports * M * nocdynp.clockRate + << " W" << std::endl; std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? router.buffer.power.readOp.longer_channel_leakage * - nocdynp.input_ports - : router.buffer.power.readOp.leakage * nocdynp.input_ports) - << " W" << std::endl; + << (long_channel + ? router.buffer.power.readOp.longer_channel_leakage * + nocdynp.input_ports + : router.buffer.power.readOp.leakage * + nocdynp.input_ports) + << " W" << std::endl; if (power_gating) { std::cout << indent_str << indent_str_next - << "Subthreshold Leakage with power gating = " - << (long_channel - ? router.buffer.power.readOp - .power_gated_with_long_channel_leakage - : router.buffer.power.readOp.power_gated_leakage) - << " W" << std::endl; + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router.buffer.power.readOp + .power_gated_with_long_channel_leakage + : router.buffer.power.readOp.power_gated_leakage) + << " W" << std::endl; } std::cout << indent_str << indent_str_next << "Gate Leakage = " - << router.buffer.power.readOp.gate_leakage * nocdynp.input_ports - << " W" << std::endl; + << router.buffer.power.readOp.gate_leakage * + nocdynp.input_ports + << " W" << std::endl; std::cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router.buffer.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << std::endl; + << router.buffer.rt_power.readOp.dynamic / + nocdynp.executionTime + << " W" << std::endl; std::cout << std::endl; std::cout << indent_str << indent_str << "Crossbar:" << std::endl; std::cout << indent_str << indent_str_next - << "Area = " << router.crossbar.area.get_area() * 1e-6 << " mm^2" - << std::endl; + << "Area = " << router.crossbar.area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str << indent_str_next << "Peak Dynamic = " - << router.crossbar.power.readOp.dynamic * nocdynp.clockRate * - nocdynp.min_ports * M - << " W" << std::endl; + << router.crossbar.power.readOp.dynamic * nocdynp.clockRate * + nocdynp.min_ports * M + << " W" << std::endl; std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? router.crossbar.power.readOp.longer_channel_leakage - : router.crossbar.power.readOp.leakage) - << " W" << std::endl; + << (long_channel + ? router.crossbar.power.readOp.longer_channel_leakage + : router.crossbar.power.readOp.leakage) + << " W" << std::endl; if (power_gating) std::cout << indent_str << indent_str_next - << "Subthreshold Leakage with power gating = " - << (long_channel - ? router.crossbar.power.readOp - .power_gated_with_long_channel_leakage - : router.crossbar.power.readOp.power_gated_leakage) - << " W" << std::endl; - std::cout << indent_str << indent_str_next - << "Gate Leakage = " << router.crossbar.power.readOp.gate_leakage - << " W" << std::endl; + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router.crossbar.power.readOp + .power_gated_with_long_channel_leakage + : router.crossbar.power.readOp.power_gated_leakage) + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Gate Leakage = " + << router.crossbar.power.readOp.gate_leakage << " W" + << std::endl; std::cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router.crossbar.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << std::endl; + << router.crossbar.rt_power.readOp.dynamic / + nocdynp.executionTime + << " W" << std::endl; std::cout << std::endl; std::cout << indent_str << indent_str << "Arbiter:" << std::endl; std::cout << indent_str << indent_str_next << "Peak Dynamic = " - << router.arbiter.power.readOp.dynamic * nocdynp.clockRate * - nocdynp.min_ports * M - << " W" << std::endl; + << router.arbiter.power.readOp.dynamic * nocdynp.clockRate * + nocdynp.min_ports * M + << " W" << std::endl; std::cout << indent_str << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? router.arbiter.power.readOp.longer_channel_leakage - : router.arbiter.power.readOp.leakage) - << " W" << std::endl; + << (long_channel + ? router.arbiter.power.readOp.longer_channel_leakage + : router.arbiter.power.readOp.leakage) + << " W" << std::endl; if (power_gating) std::cout << indent_str << indent_str_next - << "Subthreshold Leakage with power gating = " - << (long_channel - ? router.arbiter.power.readOp - .power_gated_with_long_channel_leakage - : router.arbiter.power.readOp.power_gated_leakage) - << " W" << std::endl; - std::cout << indent_str << indent_str_next - << "Gate Leakage = " << router.arbiter.power.readOp.gate_leakage - << " W" << std::endl; + << "Subthreshold Leakage with power gating = " + << (long_channel + ? router.arbiter.power.readOp + .power_gated_with_long_channel_leakage + : router.arbiter.power.readOp.power_gated_leakage) + << " W" << std::endl; + std::cout << indent_str << indent_str_next << "Gate Leakage = " + << router.arbiter.power.readOp.gate_leakage << " W" + << std::endl; std::cout << indent_str << indent_str_next << "Runtime Dynamic = " - << router.arbiter.rt_power.readOp.dynamic / nocdynp.executionTime - << " W" << std::endl; + << router.arbiter.rt_power.readOp.dynamic / + nocdynp.executionTime + << " W" << std::endl; std::cout << std::endl; } } if (link_bus_exist) { - std::cout << indent_str << (nocdynp.type ? "Per Router " : "") << link_name - << ": " << std::endl; + std::cout << indent_str << (nocdynp.type ? "Per Router " : "") + << link_name << ": " << std::endl; std::cout << indent_str_next - << "Area = " << link_bus_tot_per_Router.area.get_area() * 1e-6 - << " mm^2" << std::endl; + << "Area = " << link_bus_tot_per_Router.area.get_area() * 1e-6 + << " mm^2" << std::endl; std::cout << indent_str_next << "Peak Dynamic = " - << link_bus_tot_per_Router.power.readOp.dynamic * nocdynp.clockRate - << " W" << std::endl; - std::cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? link_bus_tot_per_Router.power.readOp.longer_channel_leakage - : link_bus_tot_per_Router.power.readOp.leakage) - << " W" << std::endl; + << link_bus_tot_per_Router.power.readOp.dynamic * + nocdynp.clockRate + << " W" << std::endl; + std::cout + << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? link_bus_tot_per_Router.power.readOp.longer_channel_leakage + : link_bus_tot_per_Router.power.readOp.leakage) + << " W" << std::endl; if (power_gating) - std::cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? link_bus_tot_per_Router.power.readOp - .power_gated_with_long_channel_leakage - : link_bus_tot_per_Router.power.readOp.power_gated_leakage) - << " W" << std::endl; + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? link_bus_tot_per_Router.power.readOp + .power_gated_with_long_channel_leakage + : link_bus_tot_per_Router.power.readOp.power_gated_leakage) + << " W" << std::endl; std::cout << indent_str_next << "Gate Leakage = " - << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << std::endl; + << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" + << std::endl; std::cout << indent_str_next << "Runtime Dynamic = " - << link_bus.rt_power.readOp.dynamic / nocdynp.executionTime << " W" - << std::endl; + << link_bus.rt_power.readOp.dynamic / nocdynp.executionTime + << " W" << std::endl; std::cout << std::endl; } } else { diff --git a/src/processor.cc b/src/processor.cc index 601b4a1..71b0903 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -45,11 +45,9 @@ #include #include -Processor::Processor() { +Processor::Processor() {} -} - -void Processor::init(const ParseXML *XML) { +void Processor::init(const ParseXML *XML, bool cp) { // TODO: using one global copy may have problems. /* * placement and routing overhead is 10%, core scales worse than cache 40% is @@ -60,30 +58,36 @@ void Processor::init(const ParseXML *XML) { int i; double pppm_t[4] = {1, 1, 1, 1}; set_proc_param(); - if (procdynp.homoCore) + if (procdynp.homoCore) { numCore = procdynp.numCore == 0 ? 0 : 1; - else + } else { numCore = procdynp.numCore; + } - if (procdynp.homoL2) + if (procdynp.homoL2) { numL2 = procdynp.numL2 == 0 ? 0 : 1; - else + } else { numL2 = procdynp.numL2; + } if (XML->sys.Private_L2 && numCore != numL2) { - cout << "Number of private L2 does not match number of cores" << endl; - exit(0); + std::cerr << "[ Processor ] Error:Number of private L2 does not match " + "number of cores" + << std::endl; + exit(1); } - if (procdynp.homoL3) + if (procdynp.homoL3) { numL3 = procdynp.numL3 == 0 ? 0 : 1; - else + } else { numL3 = procdynp.numL3; + } - if (procdynp.homoNOC) + if (procdynp.homoNOC) { numNOC = procdynp.numNOC == 0 ? 0 : 1; - else + } else { numNOC = procdynp.numNOC; + } // if (!procdynp.homoNOC) // { @@ -91,15 +95,17 @@ void Processor::init(const ParseXML *XML) { // exit(0); // } - if (procdynp.homoL1Dir) + if (procdynp.homoL1Dir) { numL1Dir = procdynp.numL1Dir == 0 ? 0 : 1; - else + } else { numL1Dir = procdynp.numL1Dir; + } - if (procdynp.homoL2Dir) + if (procdynp.homoL2Dir) { numL2Dir = procdynp.numL2Dir == 0 ? 0 : 1; - else + } else { numL2Dir = procdynp.numL2Dir; + } for (i = 0; i < numCore; i++) { cores.push_back(new Core(XML, i, &interface_ip)); @@ -147,27 +153,31 @@ void Processor::init(const ParseXML *XML) { if (!XML->sys.Private_L2) { if (numL2 > 0) { for (i = 0; i < numL2; i++) { - l2array.push_back(new SharedCache()); - l2array[i]->set_params(XML, i, &interface_ip); - l2array[i]->set_stats(XML); - l2array[i]->computeArea(); - l2array[i]->computeStaticPower(true); - l2array[i]->computeStaticPower(); + for (!cp) { + l2array.push_back(SharedCache()); + } + l2array[i].set_params(XML, i, &interface_ip); + l2array[i].set_stats(XML); + for (!cp) { + l2array[i].computeArea(); + } + l2array[i].computeStaticPower(true); + l2array[i].computeStaticPower(); if (procdynp.homoL2) { l2.area.set_area(l2.area.get_area() + - l2array[i]->area.get_area() * procdynp.numL2); + l2array[i].area.get_area() * procdynp.numL2); set_pppm(pppm_t, - l2array[i]->cachep.clockRate * procdynp.numL2, + l2array[i].cachep.clockRate * procdynp.numL2, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.power = l2.power + l2array[i]->power * pppm_t; + l2.power = l2.power + l2array[i].power * pppm_t; set_pppm(pppm_t, - 1 / l2array[i]->cachep.executionTime, + 1 / l2array[i].cachep.executionTime, procdynp.numL2, procdynp.numL2, procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; + l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; area.set_area( area.get_area() + l2.area.get_area()); // placement and routing overhead is 10%, l2 @@ -176,21 +186,20 @@ void Processor::init(const ParseXML *XML) { power = power + l2.power; rt_power = rt_power + l2.rt_power; } else { - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); + l2.area.set_area(l2.area.get_area() + l2array[i].area.get_area()); area.set_area( area.get_area() + - l2array[i] - ->area.get_area()); // placement and routing overhead is - // 10%, l2 scales worse than cache - // 40% is accumulated from 90 to 22nm + l2array[i].area.get_area()); // placement and routing overhead is + // 10%, l2 scales worse than cache + // 40% is accumulated from 90 to 22nm - set_pppm(pppm_t, l2array[i]->cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i]->power * pppm_t; - power = power + l2array[i]->power * pppm_t; + set_pppm(pppm_t, l2array[i].cachep.clockRate, 1, 1, 1); + l2.power = l2.power + l2array[i].power * pppm_t; + power = power + l2array[i].power * pppm_t; ; - set_pppm(pppm_t, 1 / l2array[i]->cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i]->rt_power * pppm_t; - rt_power = rt_power + l2array[i]->rt_power * pppm_t; + set_pppm(pppm_t, 1 / l2array[i].cachep.executionTime, 1, 1, 1); + l2.rt_power = l2.rt_power + l2array[i].rt_power * pppm_t; + rt_power = rt_power + l2array[i].rt_power * pppm_t; } } } @@ -198,27 +207,31 @@ void Processor::init(const ParseXML *XML) { if (numL3 > 0) { for (i = 0; i < numL3; i++) { - l3array.push_back(new SharedCache()); - l3array[i]->set_params(XML, i, &interface_ip, L3); - l3array[i]->set_stats(XML); - l3array[i]->computeArea(); - l3array[i]->computeStaticPower(true); - l3array[i]->computeStaticPower(); + for (!cp) { + l3array.push_back(SharedCache()); + } + l3array[i].set_params(XML, i, &interface_ip, L3); + l3array[i].set_stats(XML); + for (!cp) { + l3array[i].computeArea(); + } + l3array[i].computeStaticPower(true); + l3array[i].computeStaticPower(); if (procdynp.homoL3) { l3.area.set_area(l3.area.get_area() + - l3array[i]->area.get_area() * procdynp.numL3); + l3array[i].area.get_area() * procdynp.numL3); set_pppm(pppm_t, - l3array[i]->cachep.clockRate * procdynp.numL3, + l3array[i].cachep.clockRate * procdynp.numL3, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.power = l3.power + l3array[i]->power * pppm_t; + l3.power = l3.power + l3array[i].power * pppm_t; set_pppm(pppm_t, - 1 / l3array[i]->cachep.executionTime, + 1 / l3array[i].cachep.executionTime, procdynp.numL3, procdynp.numL3, procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; + l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; area.set_area(area.get_area() + l3.area.get_area()); // placement and routing overhead is // 10%, l3 scales worse than cache @@ -227,44 +240,48 @@ void Processor::init(const ParseXML *XML) { rt_power = rt_power + l3.rt_power; } else { - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); + l3.area.set_area(l3.area.get_area() + l3array[i].area.get_area()); area.set_area( area.get_area() + - l3array[i]->area.get_area()); // placement and routing overhead is - // 10%, l3 scales worse than cache 40% - // is accumulated from 90 to 22nm - set_pppm(pppm_t, l3array[i]->cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i]->power * pppm_t; - power = power + l3array[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l3array[i]->cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i]->rt_power * pppm_t; - rt_power = rt_power + l3array[i]->rt_power * pppm_t; + l3array[i].area.get_area()); // placement and routing overhead is + // 10%, l3 scales worse than cache 40% + // is accumulated from 90 to 22nm + set_pppm(pppm_t, l3array[i].cachep.clockRate, 1, 1, 1); + l3.power = l3.power + l3array[i].power * pppm_t; + power = power + l3array[i].power * pppm_t; + set_pppm(pppm_t, 1 / l3array[i].cachep.executionTime, 1, 1, 1); + l3.rt_power = l3.rt_power + l3array[i].rt_power * pppm_t; + rt_power = rt_power + l3array[i].rt_power * pppm_t; } } } if (numL1Dir > 0) { for (i = 0; i < numL1Dir; i++) { - l1dirarray.push_back(new SharedCache()); - l1dirarray[i]->set_params(XML, i, &interface_ip, L1Directory); - l1dirarray[i]->set_stats(XML); - l1dirarray[i]->computeArea(); - l1dirarray[i]->computeStaticPower(true); - l1dirarray[i]->computeStaticPower(); + for (!cp) { + l1dirarray.push_back(SharedCache()); + } + l1dirarray[i].set_params(XML, i, &interface_ip, L1Directory); + l1dirarray[i].set_stats(XML); + for (!cp) { + l1dirarray[i].computeArea(); + } + l1dirarray[i].computeStaticPower(true); + l1dirarray[i].computeStaticPower(); if (procdynp.homoL1Dir) { l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i]->area.get_area() * procdynp.numL1Dir); + l1dirarray[i].area.get_area() * procdynp.numL1Dir); set_pppm(pppm_t, - l1dirarray[i]->cachep.clockRate * procdynp.numL1Dir, + l1dirarray[i].cachep.clockRate * procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; + l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; set_pppm(pppm_t, - 1 / l1dirarray[i]->cachep.executionTime, + 1 / l1dirarray[i].cachep.executionTime, procdynp.numL1Dir, procdynp.numL1Dir, procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; + l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; area.set_area( area.get_area() + l1dir.area.get_area()); // placement and routing overhead is 10%, @@ -275,40 +292,44 @@ void Processor::init(const ParseXML *XML) { } else { l1dir.area.set_area(l1dir.area.get_area() + - l1dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); - set_pppm(pppm_t, l1dirarray[i]->cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i]->power * pppm_t; - power = power + l1dirarray[i]->power; - set_pppm(pppm_t, 1 / l1dirarray[i]->cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power * pppm_t; - rt_power = rt_power + l1dirarray[i]->rt_power; + l1dirarray[i].area.get_area()); + area.set_area(area.get_area() + l1dirarray[i].area.get_area()); + set_pppm(pppm_t, l1dirarray[i].cachep.clockRate, 1, 1, 1); + l1dir.power = l1dir.power + l1dirarray[i].power * pppm_t; + power = power + l1dirarray[i].power; + set_pppm(pppm_t, 1 / l1dirarray[i].cachep.executionTime, 1, 1, 1); + l1dir.rt_power = l1dir.rt_power + l1dirarray[i].rt_power * pppm_t; + rt_power = rt_power + l1dirarray[i].rt_power; } } } - if (numL2Dir > 0) + if (numL2Dir > 0) { for (i = 0; i < numL2Dir; i++) { - l2dirarray.push_back(new SharedCache()); - l2dirarray[i]->set_params(XML, i, &interface_ip, L2Directory); - l2dirarray[i]->set_stats(XML); - l2dirarray[i]->computeArea(); - l2dirarray[i]->computeStaticPower(true); - l2dirarray[i]->computeStaticPower(); + for (!cp) { + l2dirarray.push_back(SharedCache()); + } + l2dirarray[i].set_params(XML, i, &interface_ip, L2Directory); + l2dirarray[i].set_stats(XML); + for (!cp) { + l2dirarray[i].computeArea(); + } + l2dirarray[i].computeStaticPower(true); + l2dirarray[i].computeStaticPower(); if (procdynp.homoL2Dir) { l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i]->area.get_area() * procdynp.numL2Dir); + l2dirarray[i].area.get_area() * procdynp.numL2Dir); set_pppm(pppm_t, - l2dirarray[i]->cachep.clockRate * procdynp.numL2Dir, + l2dirarray[i].cachep.clockRate * procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; + l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; set_pppm(pppm_t, - 1 / l2dirarray[i]->cachep.executionTime, + 1 / l2dirarray[i].cachep.executionTime, procdynp.numL2Dir, procdynp.numL2Dir, procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; + l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; area.set_area( area.get_area() + l2dir.area.get_area()); // placement and routing overhead is 10%, @@ -319,20 +340,24 @@ void Processor::init(const ParseXML *XML) { } else { l2dir.area.set_area(l2dir.area.get_area() + - l2dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); - set_pppm(pppm_t, l2dirarray[i]->cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i]->power * pppm_t; - power = power + l2dirarray[i]->power * pppm_t; - set_pppm(pppm_t, 1 / l2dirarray[i]->cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power * pppm_t; - rt_power = rt_power + l2dirarray[i]->rt_power * pppm_t; + l2dirarray[i].area.get_area()); + area.set_area(area.get_area() + l2dirarray[i].area.get_area()); + set_pppm(pppm_t, l2dirarray[i].cachep.clockRate, 1, 1, 1); + l2dir.power = l2dir.power + l2dirarray[i].power * pppm_t; + power = power + l2dirarray[i].power * pppm_t; + set_pppm(pppm_t, 1 / l2dirarray[i].cachep.executionTime, 1, 1, 1); + l2dir.rt_power = l2dir.rt_power + l2dirarray[i].rt_power * pppm_t; + rt_power = rt_power + l2dirarray[i].rt_power * pppm_t; } } + } + // Memory Controllers: if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { mc.set_params(XML, &interface_ip, MC); - mc.computeArea(); + if (!cp) { + mc.computeArea(); + } mcs.area.set_area(mcs.area.get_area() + mc.area.get_area() * XML->sys.mc.number_mcs); area.set_area(area.get_area() + @@ -357,11 +382,13 @@ void Processor::init(const ParseXML *XML) { rt_power = rt_power + mcs.rt_power; } - if (XML->sys.flashc.number_mcs > 0) // flash controller - { + // Flash Controller: + if (XML->sys.flashc.number_mcs > 0) { flashcontroller.set_params(XML, &interface_ip); flashcontroller.set_stats(XML); - flashcontroller.computeArea(); + if (!cp) { + flashcontroller.computeArea(); + } flashcontroller.computeStaticPower(); flashcontroller.computeDynamicPower(); double number_fcs = flashcontroller.fcp.num_mcs; @@ -377,9 +404,12 @@ void Processor::init(const ParseXML *XML) { rt_power = rt_power + flashcontrollers.rt_power; } + // Network Interface Unit: if (XML->sys.niu.number_units > 0) { niu.set_params(XML, &interface_ip); - niu.computeArea(); + if (!cp) { + niu.computeArea(); + } niu.computeStaticPower(); nius.area.set_area(nius.area.get_area() + niu.area.get_area() * XML->sys.niu.number_units); @@ -403,9 +433,12 @@ void Processor::init(const ParseXML *XML) { rt_power = rt_power + nius.rt_power; } + // PCIe Controller: if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { pcie.set_params(XML, &interface_ip); - pcie.computeArea(); + if (!cp) { + pcie.computeArea(); + } pcies.area.set_area(pcies.area.get_area() + pcie.area.get_area() * XML->sys.pcie.number_units); area.set_area(area.get_area() + @@ -430,38 +463,47 @@ void Processor::init(const ParseXML *XML) { rt_power = rt_power + pcies.rt_power; } + // Network(s) on Chip: if (numNOC > 0) { for (i = 0; i < numNOC; i++) { if (XML->sys.NoC[i].type) { // First add up area of routers if NoC is used - nocs.push_back(new NoC()); - nocs[i]->set_params(XML, i, &interface_ip, 1); - nocs[i]->set_stats(XML); - nocs[i]->computeArea(); + if (!cp) { + nocs.push_back(NoC()); + } + nocs[i].set_params(XML, i, &interface_ip, 1); + nocs[i].set_stats(XML); + if (!cp) { + nocs[i].computeArea(); + } if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); + nocs[i].area.get_area() * procdynp.numNOC); area.set_area(area.get_area() + noc.area.get_area()); } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); + noc.area.set_area(noc.area.get_area() + nocs[i].area.get_area()); + area.set_area(area.get_area() + nocs[i].area.get_area()); } } else { // Bus based interconnect - nocs.push_back(new NoC()); - nocs[i]->set_params( + if (!cp) { + nocs.push_back(NoC()); + } + nocs[i].set_params( XML, i, &interface_ip, 1, sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage)); - nocs[i]->set_stats(XML); - nocs[i]->computeArea(); + nocs[i].set_stats(XML); + if (!cp) { + nocs[i].computeArea(); + } if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + - nocs[i]->area.get_area() * procdynp.numNOC); + nocs[i].area.get_area() * procdynp.numNOC); area.set_area(area.get_area() + noc.area.get_area()); } else { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); + noc.area.set_area(noc.area.get_area() + nocs[i].area.get_area()); + area.set_area(area.get_area() + nocs[i].area.get_area()); } } } @@ -472,64 +514,56 @@ void Processor::init(const ParseXML *XML) { * must be obtain to decide the link routing */ for (i = 0; i < numNOC; i++) { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) { - nocs[i]->init_link_bus( + if (nocs[i].nocdynp.has_global_link && XML->sys.NoC[i].type) { + nocs[i].init_link_bus( sqrt(area.get_area() * XML->sys.NoC[i].chip_coverage)); // compute global links if (procdynp.homoNOC) { noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes * procdynp.numNOC); area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes * procdynp.numNOC); + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes * procdynp.numNOC); } else { noc.area.set_area(noc.area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes); area.set_area(area.get_area() + - nocs[i]->link_bus_tot_per_Router.area.get_area() * - nocs[i]->nocdynp.total_nodes); + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes); } } } // Compute energy of NoC (w or w/o links) or buses for (i = 0; i < numNOC; i++) { - nocs[i]->computePower(); - nocs[i]->computeRuntimeDynamicPower(); + nocs[i].computePower(); + nocs[i].computeRuntimeDynamicPower(); if (procdynp.homoNOC) { set_pppm(pppm_t, - procdynp.numNOC * nocs[i]->nocdynp.clockRate, + procdynp.numNOC * nocs[i].nocdynp.clockRate, procdynp.numNOC, procdynp.numNOC, procdynp.numNOC); - noc.power = noc.power + nocs[i]->power * pppm_t; + noc.power = noc.power + nocs[i].power * pppm_t; set_pppm(pppm_t, - 1 / nocs[i]->nocdynp.executionTime, + 1 / nocs[i].nocdynp.executionTime, procdynp.numNOC, procdynp.numNOC, procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; + noc.rt_power = noc.rt_power + nocs[i].rt_power * pppm_t; power = power + noc.power; rt_power = rt_power + noc.rt_power; } else { - set_pppm(pppm_t, nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power * pppm_t; - power = power + nocs[i]->power * pppm_t; - set_pppm(pppm_t, 1 / nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power * pppm_t; - rt_power = rt_power + nocs[i]->rt_power * pppm_t; + set_pppm(pppm_t, nocs[i].nocdynp.clockRate, 1, 1, 1); + noc.power = noc.power + nocs[i].power * pppm_t; + power = power + nocs[i].power * pppm_t; + set_pppm(pppm_t, 1 / nocs[i].nocdynp.executionTime, 1, 1, 1); + noc.rt_power = noc.rt_power + nocs[i].rt_power * pppm_t; + rt_power = rt_power + nocs[i].rt_power * pppm_t; } } } - - // //clock power - // globalClock.init_wire_external(is_default, &interface_ip); - // globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 - // globalClock.end_wiring_level =5;//toplevel metal - // globalClock.start_wiring_level =5;//toplevel metal - // globalClock.l_ip.with_clock_grid=false;//global clock does not drive local - // final nodes globalClock.optimize_wire(); } void Processor::displayDeviceType(int device_type_, uint32_t indent) { @@ -592,412 +626,446 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { string indent_str(indent, ' '); string indent_str_next(indent + 2, ' '); if (is_tdp) { - if (plevel < 5) { - cout + std::cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " << VER_UPDATE << ") results (current print level is " << plevel << ", please increase print level to see the details in components): " - << endl; + << std::endl; } else { - cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR << " of " - << VER_UPDATE << ") results (current print level is 5)" << endl; + std::cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE + << ") results (current print level is 5)" << std::endl; } - cout << "******************************************************************" - "***********************" - << endl; - cout << indent_str << "Technology " << XML->sys.core_tech_node << " nm" - << endl; - // cout <sys.interconnect_projection_type<sys.core_tech_node << " nm" + << std::endl; + // std::cout <sys.interconnect_projection_type<sys.interconnect_projection_type, indent); - cout << indent_str << "Core clock Rate(MHz) " << XML->sys.core[0].clock_rate - << endl; - cout << endl; - cout << "******************************************************************" - "***********************" - << endl; - cout << "Processor: " << endl; - cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str << "Peak Power = " - << power.readOp.dynamic + - (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) + - power.readOp.gate_leakage - << " W" << endl; - cout << indent_str << "Total Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) + - power.readOp.gate_leakage - << " W" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" - << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel ? power.readOp.longer_channel_leakage - : power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str << "Subthreshold Leakage with power gating = " - << (long_channel ? power.readOp.power_gated_with_long_channel_leakage - : power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" - << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic - << " W" << endl; - cout << endl; + std::cout << indent_str << "Core clock Rate(MHz) " + << XML->sys.core[0].clock_rate << std::endl; + std::cout << std::endl; + std::cout + << "******************************************************************" + "***********************" + << std::endl; + std::cout << "Processor: " << std::endl; + std::cout << indent_str << "Area = " << area.get_area() * 1e-6 << " mm^2" + << std::endl; + std::cout << indent_str << "Peak Power = " + << power.readOp.dynamic + + (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + + power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str << "Total Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + + power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str << "Subthreshold Leakage = " + << (long_channel ? power.readOp.longer_channel_leakage + : power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str << "Subthreshold Leakage with power gating = " + << (long_channel + ? power.readOp.power_gated_with_long_channel_leakage + : power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage + << " W" << std::endl; + std::cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic + << " W" << std::endl; + std::cout << std::endl; if (numCore > 0) { - cout << indent_str << "Total Cores: " << XML->sys.number_of_cores - << " cores " << endl; + std::cout << indent_str << "Total Cores: " << XML->sys.number_of_cores + << " cores " << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next << "Area = " << core.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? core.power.readOp.longer_channel_leakage - : core.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? core.power.readOp.power_gated_with_long_channel_leakage - : core.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << core.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << core.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? core.power.readOp.longer_channel_leakage + : core.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? core.power.readOp.power_gated_with_long_channel_leakage + : core.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (!XML->sys.Private_L2) { if (numL2 > 0) { - cout << indent_str << "Total L2s: " << endl; + std::cout << indent_str << "Total L2s: " << std::endl; displayDeviceType(XML->sys.L2[0].device_type, indent); - cout << indent_str_next << "Area = " << l2.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? l2.power.readOp.longer_channel_leakage - : l2.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? l2.power.readOp.power_gated_with_long_channel_leakage - : l2.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << l2.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l2.power.readOp.longer_channel_leakage + : l2.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l2.power.readOp.power_gated_with_long_channel_leakage + : l2.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } } if (numL3 > 0) { - cout << indent_str << "Total L3s: " << endl; + std::cout << indent_str << "Total L3s: " << std::endl; displayDeviceType(XML->sys.L3[0].device_type, indent); - cout << indent_str_next << "Area = " << l3.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? l3.power.readOp.longer_channel_leakage - : l3.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? l3.power.readOp.power_gated_with_long_channel_leakage - : l3.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << l3.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l3.power.readOp.longer_channel_leakage + : l3.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l3.power.readOp.power_gated_with_long_channel_leakage + : l3.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (numL1Dir > 0) { - cout << indent_str << "Total First Level Directory: " << endl; + std::cout << indent_str << "Total First Level Directory: " << std::endl; displayDeviceType(XML->sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l1dir.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? l1dir.power.readOp.longer_channel_leakage - : l1dir.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? l1dir.power.readOp.power_gated_with_long_channel_leakage - : l1dir.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << l1dir.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l1dir.power.readOp.longer_channel_leakage + : l1dir.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l1dir.power.readOp.power_gated_with_long_channel_leakage + : l1dir.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (numL2Dir > 0) { - cout << indent_str << "Total Second Level Directory: " << endl; + std::cout << indent_str << "Total Second Level Directory: " << std::endl; displayDeviceType(XML->sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l2dir.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? l2dir.power.readOp.longer_channel_leakage - : l2dir.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? l2dir.power.readOp.power_gated_with_long_channel_leakage - : l2dir.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << l2dir.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? l2dir.power.readOp.longer_channel_leakage + : l2dir.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? l2dir.power.readOp.power_gated_with_long_channel_leakage + : l2dir.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (numNOC > 0) { - cout << indent_str << "Total NoCs (Network/Bus): " << endl; + std::cout << indent_str << "Total NoCs (Network/Bus): " << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next << "Area = " << noc.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? noc.power.readOp.longer_channel_leakage - : noc.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? noc.power.readOp.power_gated_with_long_channel_leakage - : noc.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << noc.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? noc.power.readOp.longer_channel_leakage + : noc.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? noc.power.readOp.power_gated_with_long_channel_leakage + : noc.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { - cout << indent_str << "Total MCs: " << XML->sys.mc.number_mcs - << " Memory Controllers " << endl; + std::cout << indent_str << "Total MCs: " << XML->sys.mc.number_mcs + << " Memory Controllers " << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next << "Area = " << mcs.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? mcs.power.readOp.longer_channel_leakage - : mcs.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? mcs.power.readOp.power_gated_with_long_channel_leakage - : mcs.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << mcs.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? mcs.power.readOp.longer_channel_leakage + : mcs.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? mcs.power.readOp.power_gated_with_long_channel_leakage + : mcs.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (XML->sys.flashc.number_mcs > 0) { - cout << indent_str - << "Total Flash/SSD Controllers: " << flashcontroller.fcp.num_mcs - << " Flash/SSD Controllers " << endl; + std::cout << indent_str << "Total Flash/SSD Controllers: " + << flashcontroller.fcp.num_mcs << " Flash/SSD Controllers " + << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next - << "Area = " << flashcontrollers.area.get_area() * 1e-6 << " mm^2" - << endl; - cout << indent_str_next - << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" - << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel - ? flashcontrollers.power.readOp.longer_channel_leakage - : flashcontrollers.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? flashcontrollers.power.readOp - .power_gated_with_long_channel_leakage - : flashcontrollers.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage - << " W" << endl; - cout << indent_str_next - << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic - << " W" << endl; - cout << endl; + std::cout << indent_str_next + << "Area = " << flashcontrollers.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic + << " W" << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel + ? flashcontrollers.power.readOp.longer_channel_leakage + : flashcontrollers.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout << indent_str_next + << "Subthreshold Leakage with power gating = " + << (long_channel + ? flashcontrollers.power.readOp + .power_gated_with_long_channel_leakage + : flashcontrollers.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next << "Gate Leakage = " + << flashcontrollers.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next << "Runtime Dynamic = " + << flashcontrollers.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (XML->sys.niu.number_units > 0) { - cout << indent_str << "Total NIUs: " << niu.niup.num_units - << " Network Interface Units " << endl; + std::cout << indent_str << "Total NIUs: " << niu.niup.num_units + << " Network Interface Units " << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next << "Area = " << nius.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? nius.power.readOp.longer_channel_leakage - : nius.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? nius.power.readOp.power_gated_with_long_channel_leakage - : nius.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << nius.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? nius.power.readOp.longer_channel_leakage + : nius.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? nius.power.readOp.power_gated_with_long_channel_leakage + : nius.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { - cout << indent_str << "Total PCIes: " << pcie.pciep.num_units - << " PCIe Controllers " << endl; + std::cout << indent_str << "Total PCIes: " << pcie.pciep.num_units + << " PCIe Controllers " << std::endl; displayDeviceType(XML->sys.device_type, indent); - cout << indent_str_next << "Area = " << pcies.area.get_area() * 1e-6 - << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic - << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? pcies.power.readOp.longer_channel_leakage - : pcies.power.readOp.leakage) - << " W" << endl; - if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel - ? pcies.power.readOp.power_gated_with_long_channel_leakage - : pcies.power.readOp.power_gated_leakage) - << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" - << endl; - cout << indent_str_next - << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" - << endl; - cout << endl; + std::cout << indent_str_next << "Area = " << pcies.area.get_area() * 1e-6 + << " mm^2" << std::endl; + std::cout << indent_str_next + << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" + << std::endl; + std::cout << indent_str_next << "Subthreshold Leakage = " + << (long_channel ? pcies.power.readOp.longer_channel_leakage + : pcies.power.readOp.leakage) + << " W" << std::endl; + if (power_gating) { + std::cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? pcies.power.readOp.power_gated_with_long_channel_leakage + : pcies.power.readOp.power_gated_leakage) + << " W" << std::endl; + } + std::cout << indent_str_next + << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" + << std::endl; + std::cout << indent_str_next + << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" + << std::endl; + std::cout << std::endl; } - cout << "******************************************************************" - "***********************" - << endl; + std::cout + << "******************************************************************" + "***********************" + << std::endl; if (plevel > 1) { for (i = 0; i < numCore; i++) { cores[i]->displayEnergy(indent + 4, plevel, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + std::cout + << "**************************************************************" + "***************************" + << std::endl; } if (!XML->sys.Private_L2) { for (i = 0; i < numL2; i++) { - l2array[i]->display(indent + 4, is_tdp); - cout << "************************************************************" - "*****************************" - << endl; + l2array[i].display(indent + 4, is_tdp); + std::cout + << "************************************************************" + "*****************************" + << std::endl; } } for (i = 0; i < numL3; i++) { - l3array[i]->display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + l3array[i].display(indent + 4, is_tdp); + std::cout + << "**************************************************************" + "***************************" + << std::endl; } for (i = 0; i < numL1Dir; i++) { - l1dirarray[i]->display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + l1dirarray[i].display(indent + 4, is_tdp); + std::cout + << "**************************************************************" + "***************************" + << std::endl; } for (i = 0; i < numL2Dir; i++) { - l2dirarray[i]->display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + l2dirarray[i].display(indent + 4, is_tdp); + std::cout + << "**************************************************************" + "***************************" + << std::endl; } if (XML->sys.mc.number_mcs > 0 && XML->sys.mc.memory_channels_per_mc > 0) { mc.display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + std::cout + << "**************************************************************" + "***************************" + << std::endl; } if (XML->sys.flashc.number_mcs > 0 && XML->sys.flashc.memory_channels_per_mc > 0) { flashcontroller.display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + std::cout + << "**************************************************************" + "***************************" + << std::endl; } if (XML->sys.niu.number_units > 0) { niu.display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + std::cout + << "**************************************************************" + "***************************" + << std::endl; } if (XML->sys.pcie.number_units > 0 && XML->sys.pcie.num_channels > 0) { pcie.display(indent + 4, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + std::cout + << "**************************************************************" + "***************************" + << std::endl; } for (i = 0; i < numNOC; i++) { - nocs[i]->display(indent + 4, plevel, is_tdp); - cout << "**************************************************************" - "***************************" - << endl; + nocs[i].display(indent + 4, plevel, is_tdp); + std::cout + << "**************************************************************" + "***************************" + << std::endl; } } } else { } } -void Processor::computeArea() { - -} - -void Processor::computePower() { - -} - -void Processor::computeRuntimeDynamicPower() { - -} - void Processor::set_proc_param() { bool debug = false; @@ -1019,15 +1087,15 @@ void Processor::set_proc_param() { // if (procdynp.numCore<1) // { - // cout<<" The target processor should at least have one core on + // std::cout<<" The target processor should at least have one core on // chip." - //<2) // { - // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global - // and local NOCs)"< cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; - vector nocs; + vector l2array; + vector l3array; + vector l1dirarray; + vector l2dirarray; + vector nocs; MemoryController mc; NIUController niu; PCIeController pcie; From 8b980a3179a197380c269d7f30cae0369222cc7a Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 21 Jun 2020 16:26:01 -0500 Subject: [PATCH 35/59] serialization: Debugging double counting In the L1/L2 dirarray there is double counting going on when restoring from checkpoint --- CMakeLists.txt | 3 ++- src/CMakeLists.txt | 3 ++- src/array.h | 21 ++++++++++++++++++ src/basic_components.h | 24 +++++++++++++++++++++ src/cache/sharedcache.h | 17 +++++++++++++++ src/cacti/area.h | 15 +++++++++++++ src/cacti/cacti_interface.h | 27 +++++++++++++++++++++++ src/cacti/component.h | 17 +++++++++++++++ src/interconnect.h | 11 ++++++++++ src/iocontrollers/flash_controller.h | 13 +++++++++++ src/iocontrollers/niu_controller.h | 13 +++++++++++ src/iocontrollers/pcie_controller.h | 13 +++++++++++ src/logic/functional_unit.h | 17 +++++++++++++++ src/main.cc | 32 ++++++++++++++++++++-------- src/memoryctrl/mc_backend.h | 16 ++++++++++++++ src/memoryctrl/mc_frontend.h | 11 ++++++++++ src/memoryctrl/mc_phy.h | 16 ++++++++++++++ src/memoryctrl/memoryctrl.h | 15 +++++++++++++ src/noc.h | 17 +++++++++++++++ src/options.cc | 5 +++-- src/options.h | 1 + src/processor.cc | 16 +++++++------- src/processor.h | 22 +++++++++++++++++++ 23 files changed, 324 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6c26ab..fd4f0c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,8 @@ project(mcpat DESCRIPTION "Power Timing Area Calculator" LANGUAGES CXX) find_package(Boost 1.56 REQUIRED COMPONENTS - program_options) + program_options + serialization) set(THREADS_PREFER_PTHREAD_FLAG ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1fd185e..b52bed8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,7 +42,8 @@ target_link_libraries(mcpat LINK_PUBLIC top Threads::Threads - Boost::program_options) + Boost::program_options + Boost::serialization) add_custom_command(TARGET mcpat POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PROJECT_BINARY_DIR}/mcpat diff --git a/src/array.h b/src/array.h index 1d5b621..20c14f3 100644 --- a/src/array.h +++ b/src/array.h @@ -38,6 +38,10 @@ #include "const.h" #include "parameter.h" +#include +#include +#include +#include #include #include @@ -79,6 +83,23 @@ class ArrayST : public Component { virtual void optimize_array(); virtual void compute_base_power(); void leakage_feedback(double temperature); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &name; + ar &device_ty; + ar &opt_local; + ar &core_ty; + ar &is_default; + ar &tdp_stats; + ar &rtp_stats; + ar &stats_t; + ar &power_t; + Component::serialize(ar, version); + } }; #endif /* __ARRAY_H__ */ diff --git a/src/basic_components.h b/src/basic_components.h index de58c86..e873101 100644 --- a/src/basic_components.h +++ b/src/basic_components.h @@ -35,6 +35,10 @@ #include "XML_Parse.h" #include "parameter.h" +#include +#include +#include +#include #include const double cdb_overhead = 1.1; @@ -88,6 +92,16 @@ class statsComponents { const statsComponents &y); friend statsComponents operator*(const statsComponents &x, double const *const y); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &access; + ar &hit; + ar &miss; + } }; class statsDef { @@ -105,6 +119,16 @@ class statsDef { friend statsDef operator+(const statsDef &x, const statsDef &y); friend statsDef operator*(const statsDef &x, double const *const y); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &readAc; + ar &writeAc; + ar &searchAc; + } }; double longer_channel_device_reduction(enum Device_ty device_ty = Core_device, diff --git a/src/cache/sharedcache.h b/src/cache/sharedcache.h index 7b15011..84e5692 100644 --- a/src/cache/sharedcache.h +++ b/src/cache/sharedcache.h @@ -40,6 +40,10 @@ #include "datacache.h" #include "parameter.h" +#include +#include +#include +#include #include class SharedCache : public Component { @@ -86,6 +90,19 @@ class SharedCache : public Component { enum Device_ty device_t; enum Core_type core_t; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &unicache; + ar &homenode_tdp_stats; + ar &homenode_rtp_stats; + ar &homenode_stats_t; + ar &set_area; + Component::serialize(ar, version); + } }; #endif /* SHAREDCACHE_H_ */ diff --git a/src/cacti/area.h b/src/cacti/area.h index 7c1080d..18ad502 100644 --- a/src/cacti/area.h +++ b/src/cacti/area.h @@ -35,6 +35,11 @@ #include "basic_circuit.h" #include "cacti_interface.h" +#include +#include +#include +#include + using namespace std; class Area { @@ -58,6 +63,16 @@ class Area { private: double area; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &w; + ar &h; + ar &area; + } }; #endif diff --git a/src/cacti/cacti_interface.h b/src/cacti/cacti_interface.h index b1d1664..fa9f15e 100644 --- a/src/cacti/cacti_interface.h +++ b/src/cacti/cacti_interface.h @@ -34,6 +34,10 @@ #include "const.h" +#include +#include +#include +#include #include #include #include @@ -86,6 +90,19 @@ class powerComponents { const powerComponents &y); friend powerComponents operator*(const powerComponents &x, double const *const y); + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &dynamic; + ar &leakage; + ar &gate_leakage; + ar &short_circuit; + ar &longer_channel_leakage; + ar &power_gated_leakage; + ar &power_gated_with_long_channel_leakage; + } }; class powerDef { @@ -103,6 +120,16 @@ class powerDef { friend powerDef operator+(const powerDef &x, const powerDef &y); friend powerDef operator*(const powerDef &x, double const *const y); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &readOp; + ar &writeOp; + ar &searchOp; + } }; enum Wire_type { diff --git a/src/cacti/component.h b/src/cacti/component.h index 636a432..9f36351 100644 --- a/src/cacti/component.h +++ b/src/cacti/component.h @@ -35,6 +35,11 @@ #include "area.h" #include "parameter.h" +#include +#include +#include +#include + using namespace std; class Crossbar; @@ -72,6 +77,18 @@ class Component { bool is_wl_tr_, double max_w_nmos); + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &delay; + ar &cycle_time; + ar &power; + ar &rt_power; + ar &area; + } + private: double compute_diffusion_width(int num_stacked_in, int num_folded_tr); }; diff --git a/src/interconnect.h b/src/interconnect.h index e5ae11c..f31b348 100644 --- a/src/interconnect.h +++ b/src/interconnect.h @@ -103,6 +103,17 @@ class interconnect : public Component { void compute(); TechnologyParameter::DeviceType *deviceType; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power_bit; + ar &max_unpipelined_link_delay; + ar &no_device_under_wire_area; + Component::serialize(ar, version); + } }; #endif diff --git a/src/iocontrollers/flash_controller.h b/src/iocontrollers/flash_controller.h index cfb4138..f13ed0e 100644 --- a/src/iocontrollers/flash_controller.h +++ b/src/iocontrollers/flash_controller.h @@ -38,6 +38,10 @@ #include "basic_components.h" #include "parameter.h" +#include +#include +#include +#include #include class FlashController : public Component { @@ -66,6 +70,15 @@ class FlashController : public Component { double SerDer_gates; double NMOS_sizing; double PMOS_sizing; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power_t; + Component::serialize(ar, version); + } }; #endif // __FLASHCONTROLLER_H__ diff --git a/src/iocontrollers/niu_controller.h b/src/iocontrollers/niu_controller.h index f0d6bc1..125a445 100644 --- a/src/iocontrollers/niu_controller.h +++ b/src/iocontrollers/niu_controller.h @@ -38,6 +38,10 @@ #include "basic_components.h" #include "parameter.h" +#include +#include +#include +#include #include class NIUController : public Component { @@ -60,6 +64,15 @@ class NIUController : public Component { bool power_gating; bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power_t; + Component::serialize(ar, version); + } }; #endif // __NIU_CONTROLLER_H__ diff --git a/src/iocontrollers/pcie_controller.h b/src/iocontrollers/pcie_controller.h index eacda70..2add4ce 100644 --- a/src/iocontrollers/pcie_controller.h +++ b/src/iocontrollers/pcie_controller.h @@ -38,6 +38,10 @@ #include "basic_components.h" #include "parameter.h" +#include +#include +#include +#include #include class PCIeController : public Component { @@ -60,6 +64,15 @@ class PCIeController : public Component { bool power_gating; bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power_t; + Component::serialize(ar, version); + } }; #endif // __PCIE_CONTROLLER_H__ diff --git a/src/logic/functional_unit.h b/src/logic/functional_unit.h index 13eeef9..fc7eb74 100644 --- a/src/logic/functional_unit.h +++ b/src/logic/functional_unit.h @@ -42,6 +42,10 @@ #include "parameter.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -99,6 +103,19 @@ class FunctionalUnit : public Component { // Private Methods: void computeLeakage(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power_t; + ar &stats_t; + ar &tdp_stats; + ar &rtp_stats; + ar &area_t; + Component::serialize(ar, version); + } }; #endif // __FUNCTIONAL_UNIT_H__ diff --git a/src/main.cc b/src/main.cc index 1f9b9dc..8ab3e42 100644 --- a/src/main.cc +++ b/src/main.cc @@ -36,8 +36,25 @@ #include "version.h" #include "xmlParser.h" +#include +#include +#include #include +void save(const Processor &s, std::string name) { + // Make an archive + std::ofstream ofs(name.c_str()); + boost::archive::text_oarchive oa(ofs); + oa << s; +} + +void restore(Processor &s, std::string name) { + // Restore from the Archive + std::ifstream ifs(name.c_str()); + boost::archive::text_iarchive ia(ifs); + ia >> s; +} + using namespace std; int main(int argc, char *argv[]) { @@ -53,17 +70,14 @@ int main(int argc, char *argv[]) { // parse XML-based interface ParseXML *p1 = new ParseXML(); - p1->parse(opt.input_xml); - // if(!opt.serialization_restore) { Processor proc; + Processor proc2; + p1->parse(opt.input_xml); proc.init(p1); - // save(proc, opt.serialization_path+"mp_checkpoint.txt"); - //} - // else { - // Processor proc; - // restore(proc, opt.serialization_path+"mp_checkpoint.txt"); - // proc.init(p1, true); - //} + // save(proc, opt.serialization_name); + // restore(proc2, opt.serialization_name); + // proc2.init(p1, true); + // proc2.displayEnergy(2, opt.print_level); proc.displayEnergy(2, opt.print_level); delete p1; return 0; diff --git a/src/memoryctrl/mc_backend.h b/src/memoryctrl/mc_backend.h index 5986174..85daee2 100644 --- a/src/memoryctrl/mc_backend.h +++ b/src/memoryctrl/mc_backend.h @@ -37,6 +37,10 @@ #include "basic_components.h" #include "parameter.h" +#include +#include +#include +#include #include class MCBackend : public Component { @@ -66,6 +70,18 @@ class MCBackend : public Component { bool power_gating; bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &tdp_stats; + ar &rtp_stats; + ar &stats_t; + ar &power_t; + Component::serialize(ar, version); + } }; #endif // __MC_BACKEND_H__ diff --git a/src/memoryctrl/mc_frontend.h b/src/memoryctrl/mc_frontend.h index c0547c0..c0e39c9 100644 --- a/src/memoryctrl/mc_frontend.h +++ b/src/memoryctrl/mc_frontend.h @@ -83,6 +83,17 @@ class MCFrontEnd : public Component { void computeFrontEndTDP(); void computeReadBufferTDP(); void computeWriteBufferTDP(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &frontendBuffer; + ar &readBuffer; + ar &writeBuffer; + Component::serialize(ar, version); + } }; #endif // __MC_FRONTEND_H__ diff --git a/src/memoryctrl/mc_phy.h b/src/memoryctrl/mc_phy.h index a34d116..53c12fc 100644 --- a/src/memoryctrl/mc_phy.h +++ b/src/memoryctrl/mc_phy.h @@ -37,6 +37,10 @@ #include "basic_components.h" #include "parameter.h" +#include +#include +#include +#include #include class MCPHY : public Component { @@ -66,6 +70,18 @@ class MCPHY : public Component { bool power_gating; bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &tdp_stats; + ar &rtp_stats; + ar &stats_t; + ar &power_t; + Component::serialize(ar, version); + } }; #endif // __MC_PHY_H__ diff --git a/src/memoryctrl/memoryctrl.h b/src/memoryctrl/memoryctrl.h index ad6dfe6..d18b3d4 100644 --- a/src/memoryctrl/memoryctrl.h +++ b/src/memoryctrl/memoryctrl.h @@ -40,6 +40,10 @@ #include "mc_phy.h" #include "parameter.h" +#include +#include +#include +#include #include class MemoryController : public Component { @@ -71,5 +75,16 @@ class MemoryController : public Component { bool set_area; void set_mc_param(const ParseXML *XML); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &frontend; + ar &transecEngine; + ar &PHY; + Component::serialize(ar, version); + } }; #endif /* __MEMORYCTRL_H__ */ diff --git a/src/noc.h b/src/noc.h index 4e75dcd..1b975dd 100644 --- a/src/noc.h +++ b/src/noc.h @@ -39,6 +39,11 @@ #include "parameter.h" #include "router.h" +#include +#include +#include +#include + class NoC : public Component { public: int ithNoC; @@ -92,6 +97,18 @@ class NoC : public Component { void set_noc_param(const ParseXML *XML); void init_router(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &name; + ar &link_name; + ar &router; + ar &link_bus; + Component::serialize(ar, version); + } }; #endif /* NOC_H_ */ diff --git a/src/options.cc b/src/options.cc index a8ca396..6d30dde 100644 --- a/src/options.cc +++ b/src/options.cc @@ -53,8 +53,9 @@ bool mcpat::Options::parse(int argc, char **argv) { po::options_description serialization("Serialization Options"); serialization.add_options() ("serial_path", po::value(&serialization_path), "Path/to/serialization") - ("serial_create", po::value(&serialization_create)->default_value(true), "Create A Serialization Checkpoint") - ("serial_restore", po::value(&serialization_restore)->default_value(true), "Restore from a Serialization Checkpoint") + ("serial_name", po::value(&serialization_name)->default_value("mcpat_cp.txt"), "file name to serialize to") + ("serial_create", po::value(&serialization_create)->default_value(false), "Create A Serialization Checkpoint") + ("serial_restore", po::value(&serialization_restore)->default_value(false), "Restore from a Serialization Checkpoint") ; po::options_description optimization("Optimization Options"); diff --git a/src/options.h b/src/options.h index 486fd20..55e903a 100644 --- a/src/options.h +++ b/src/options.h @@ -54,6 +54,7 @@ class Options { // Serialization Options std::string serialization_path = ""; + std::string serialization_name = ""; bool serialization_create = false; bool serialization_restore = false; diff --git a/src/processor.cc b/src/processor.cc index 71b0903..461ec0d 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -153,12 +153,12 @@ void Processor::init(const ParseXML *XML, bool cp) { if (!XML->sys.Private_L2) { if (numL2 > 0) { for (i = 0; i < numL2; i++) { - for (!cp) { + if (!cp) { l2array.push_back(SharedCache()); } l2array[i].set_params(XML, i, &interface_ip); l2array[i].set_stats(XML); - for (!cp) { + if (!cp) { l2array[i].computeArea(); } l2array[i].computeStaticPower(true); @@ -207,12 +207,12 @@ void Processor::init(const ParseXML *XML, bool cp) { if (numL3 > 0) { for (i = 0; i < numL3; i++) { - for (!cp) { + if (!cp) { l3array.push_back(SharedCache()); } l3array[i].set_params(XML, i, &interface_ip, L3); l3array[i].set_stats(XML); - for (!cp) { + if (!cp) { l3array[i].computeArea(); } l3array[i].computeStaticPower(true); @@ -257,12 +257,12 @@ void Processor::init(const ParseXML *XML, bool cp) { } if (numL1Dir > 0) { for (i = 0; i < numL1Dir; i++) { - for (!cp) { + if (!cp) { l1dirarray.push_back(SharedCache()); } l1dirarray[i].set_params(XML, i, &interface_ip, L1Directory); l1dirarray[i].set_stats(XML); - for (!cp) { + if (!cp) { l1dirarray[i].computeArea(); } l1dirarray[i].computeStaticPower(true); @@ -305,12 +305,12 @@ void Processor::init(const ParseXML *XML, bool cp) { } if (numL2Dir > 0) { for (i = 0; i < numL2Dir; i++) { - for (!cp) { + if (!cp) { l2dirarray.push_back(SharedCache()); } l2dirarray[i].set_params(XML, i, &interface_ip, L2Directory); l2dirarray[i].set_stats(XML); - for (!cp) { + if (!cp) { l2dirarray[i].computeArea(); } l2dirarray[i].computeStaticPower(true); diff --git a/src/processor.h b/src/processor.h index 6056730..12044a2 100644 --- a/src/processor.h +++ b/src/processor.h @@ -47,6 +47,11 @@ #include "router.h" #include "sharedcache.h" +#include +#include +#include +#include +#include #include class Processor : public Component { @@ -92,6 +97,23 @@ class Processor : public Component { void set_proc_param(); void displayDeviceType(int device_type_, uint32_t indent = 0); void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &l2array; + ar &l3array; + ar &l1dirarray; + ar &l2dirarray; + ar &nocs; + ar &mc; + ar &niu; + ar &pcie; + ar &flashcontroller; + Component::serialize(ar, version); + } }; #endif /* PROCESSOR_H_ */ From 2c73fa8747fab145edc0282a928c0d2dbca3d5c4 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Sun, 21 Jun 2020 21:39:13 -0500 Subject: [PATCH 36/59] Selection_logic Dep_resource & EXECU --- src/core/core.cc | 62 +-- src/core/core.h | 2 +- src/core/exec_unit.cc | 608 +++++++++++++++++++---- src/core/exec_unit.h | 18 +- src/core/renaming_unit.cc | 144 +++--- src/core/renaming_unit.h | 4 +- src/core/scheduler.cc | 32 +- src/core/scheduler.h | 2 +- src/logic/dep_resource_conflict_check.cc | 36 ++ src/logic/dep_resource_conflict_check.h | 7 + src/logic/selection_logic.cc | 31 ++ src/logic/selection_logic.h | 9 +- 12 files changed, 726 insertions(+), 229 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index f4ad7b1..2a4115b 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -48,7 +48,7 @@ Core::Core(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - ifu(0), lsu(0), mmu(0), exu(0), rnu(0), corepipe(0), undiffCore(0), + ifu(0), lsu(0), mmu(0), rnu(0), corepipe(0), undiffCore(0), l2cache(0) { /* * initialize, compute and optimize individual components. @@ -77,8 +77,11 @@ Core::Core(const ParseXML *XML_interface, mmu->set_params(XML, ithCore, &interface_ip, coredynp); mmu->computeArea(); mmu->set_stats(XML); - exu = new EXECU( + exu.set_params( XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); + exu.set_stats(XML); + exu.computeArea(); + exu.computeStaticPower(); undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); if (coredynp.core_ty == OOO) { rnu = new RENAMINGU(); @@ -108,9 +111,9 @@ Core::Core(const ParseXML *XML_interface, lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + lsu->area.get_area()); } - if (exu->exist) { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + exu->area.get_area()); + if (exu.exist) { + exu.area.set_area(exu.area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu.area.get_area()); } if (mmu->exist) { mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); @@ -155,7 +158,7 @@ void Core::computeEnergy(bool is_tdp) { ifu->computeEnergy(is_tdp); lsu->computeEnergy(is_tdp); mmu->computeDynamicPower(is_tdp); - exu->computeEnergy(is_tdp); + exu.computeDynamicPower(is_tdp); if (coredynp.core_ty == OOO) { num_units = 5.0; @@ -203,16 +206,16 @@ void Core::computeEnergy(bool is_tdp) { // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } - if (exu->exist) { + if (exu.exist) { set_pppm(pppm_t, coredynp.num_pipelines / num_units * coredynp.ALU_duty_cycle, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - exu->power = exu->power + corepipe->power * pppm_t; + exu.power = exu.power + corepipe->power * pppm_t; // cout << "EXE = " << - // exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; + // exu.power.readOp.dynamic*clockRate << " W" << endl; + power = power + exu.power; // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } @@ -245,7 +248,7 @@ void Core::computeEnergy(bool is_tdp) { ifu->computeEnergy(is_tdp); lsu->computeEnergy(is_tdp); mmu->computeDynamicPower(is_tdp); - exu->computeEnergy(is_tdp); + exu.computeDynamicPower(is_tdp); if (coredynp.core_ty == OOO) { num_units = 5.0; @@ -305,7 +308,7 @@ void Core::computeEnergy(bool is_tdp) { lsu->rt_power = lsu->rt_power + corepipe->power * pppm_t; rt_power = rt_power + lsu->rt_power; } - if (exu->exist) { + if (exu.exist) { if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.ALU_duty_cycle * XML->sys.total_cycles * @@ -319,8 +322,8 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - exu->rt_power = exu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + exu->rt_power; + exu.rt_power = exu.rt_power + corepipe->power * pppm_t; + rt_power = rt_power + exu.rt_power; } if (mmu->exist) { if (XML->sys.homogeneous_cores == 1) { @@ -342,7 +345,7 @@ void Core::computeEnergy(bool is_tdp) { } rt_power = rt_power + undiffCore->power; - // cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" + // cout << "EXE = " << exu.power.readOp.dynamic*clockRate << " W" //<< endl; if (XML->sys.Private_L2) { l2cache->computeStaticPower(); @@ -488,28 +491,28 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { mmu->displayEnergy(indent + 4, plevel, is_tdp); } } - if (exu->exist) { + if (exu.exist) { cout << indent_str << "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << exu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << exu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << exu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? exu->power.readOp.longer_channel_leakage - : exu->power.readOp.leakage) + << (long_channel ? exu.power.readOp.longer_channel_leakage + : exu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? exu->power.readOp.power_gated_with_long_channel_leakage - : exu->power.readOp.power_gated_leakage) + ? exu.power.readOp.power_gated_with_long_channel_leakage + : exu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << exu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << exu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - exu->displayEnergy(indent + 4, plevel, is_tdp); + exu.displayEnergy(indent + 4, plevel, is_tdp); } } // if (plevel >2) @@ -565,12 +568,12 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // << indent_str_next << "Memory Management Unit Gate Leakage = " << // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next << "Execution Unit Peak Dynamic = " << - // exu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // exu.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Execution Unit Subthreshold Leakage = " << - // exu->rt_power.readOp.leakage << " W" << endl; cout << + // exu.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next // << "Execution Unit Gate Leakage = " << - // exu->rt_power.readOp.gate_leakage + // exu.rt_power.readOp.gate_leakage //<< " W" << endl; } } @@ -593,10 +596,7 @@ Core ::~Core() { delete mmu; mmu = 0; } - if (exu) { - delete exu; - exu = 0; - } + if (corepipe) { delete corepipe; corepipe = 0; diff --git a/src/core/core.h b/src/core/core.h index 0892faf..e47c82c 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -57,7 +57,7 @@ class Core : public Component { InstFetchU *ifu; LoadStoreU *lsu; MemManU *mmu; - EXECU *exu; + EXECU exu; RENAMINGU *rnu; Pipeline *corepipe; UndiffCore *undiffCore; diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 9798c7f..e326385 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -42,45 +42,99 @@ #include #include -EXECU::EXECU(const ParseXML *XML_interface, +EXECU::EXECU(){ + init_params = false; + init_stats = false; +} + +void EXECU::set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, double lsq_height_, const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - lsq_height(lsq_height_), coredynp(dyn_p_), rfu(0), scheu(0), - exist(exist_) { + bool exist_){ + + XML = XML_interface; + ithCore = ithCore_; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + lsq_height = lsq_height_; + exist = exist_; bool exist_flag = true; if (!exist) { return; } - double fu_height = 0.0; clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; - rfu = new RegFU(); - rfu->set_params(XML, ithCore, &interface_ip, coredynp); - rfu->computeArea(); - rfu->set_stats(XML); - scheu = new SchedulerU(); - scheu->set_params(XML, ithCore, &interface_ip, coredynp); - scheu->computeArea(); - scheu->set_stats(XML); + rfu.set_params(XML, ithCore, &interface_ip, coredynp); + scheu.set_params(XML, ithCore, &interface_ip, coredynp); exeu.set_params(XML, ithCore, &interface_ip, coredynp, ALU); - exeu.set_stats(XML); - exeu.computeArea(); - area.set_area(area.get_area() + exeu.area.get_area() + rfu->area.get_area() + - scheu->area.get_area()); - fu_height = exeu.FU_height; + + if (coredynp.num_fpus > 0) { fp_u.set_params(XML, ithCore, &interface_ip, coredynp, FPU); + } + if (coredynp.num_muls > 0) { + mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); + + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; + * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the + * same bus there are multiple tri-state drivers and muxes that go to + * different components on the same bus + */ + + init_params = true; +} + +void EXECU::computeStaticPower(){ + //Doing nothing as of now, everything seems to be hapening inside set area itself +} + +void EXECU::set_stats(const ParseXML *XML){ + rfu.set_stats(XML); + scheu.set_stats(XML); + exeu.set_stats(XML); + if (coredynp.num_fpus > 0) { fp_u.set_stats(XML); + } + if (coredynp.num_muls > 0) { + mul.set_stats(XML); + } + init_stats = true; +} + +// void EXECU::computeArea(){ + +// } + +void EXECU::computeArea(){ + if (!init_params) { + std::cerr << "[ EXECU ] Error: must set params before calling " + "computeArea()\n"; + exit(1); + } + rfu.computeArea(); + + scheu.computeArea(); + + exeu.computeArea(); + + //all of the below interconnects depend ont he stats being set + rfu.set_stats(XML); + scheu.set_stats(XML); + exeu.set_stats(XML); + double fu_height = 0.0; + + area.set_area(area.get_area() + exeu.area.get_area() + rfu.area.get_area() + + scheu.area.get_area()); + fu_height = exeu.FU_height; + if (coredynp.num_fpus > 0) { fp_u.computeArea(); area.set_area(area.get_area() + fp_u.area.get_area()); } if (coredynp.num_muls > 0) { - mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); - mul.set_stats(XML); mul.computeArea(); area.set_area(area.get_area() + mul.area.get_area()); fu_height += mul.FU_height; @@ -112,7 +166,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32), - rfu->int_regfile_height + exeu.FU_height + lsq_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height, &interface_ip, 3, false, @@ -125,8 +179,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu.FU_height + lsq_height + - scheu->Iw_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height + + scheu.Iw_height, &interface_ip, 3, false, @@ -141,7 +195,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + exeu.FU_height + + rfu.fp_regfile_height + exeu.FU_height + mul.FU_height + lsq_height, &interface_ip, 3, @@ -156,8 +210,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu.FU_height + - mul.FU_height + lsq_height + scheu->Iw_height, + rfu.fp_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + scheu.Iw_height, &interface_ip, 3, false, @@ -174,7 +228,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), - rfu->fp_regfile_height + fp_u.FU_height, + rfu.fp_regfile_height + fp_u.FU_height, &interface_ip, 3, false, @@ -187,8 +241,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->Iw_height, + rfu.fp_regfile_height + fp_u.FU_height + lsq_height + + scheu.Iw_height, &interface_ip, 3, false, @@ -210,7 +264,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + lsq_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height, &interface_ip, 3, false, @@ -223,8 +277,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height + + scheu.Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -240,7 +294,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + + rfu.int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height, &interface_ip, 3, @@ -253,9 +307,9 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + + rfu.int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, + scheu.Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -274,7 +328,7 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u.FU_height, + rfu.fp_regfile_height + fp_u.FU_height, &interface_ip, 3, false, @@ -286,8 +340,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, + rfu.fp_regfile_height + fp_u.FU_height + lsq_height + + scheu.fp_Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -309,8 +363,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height + + scheu.Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -322,8 +376,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, + rfu.int_regfile_height + exeu.FU_height + lsq_height + + scheu.Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -339,9 +393,9 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu.FU_height + - mul.FU_height + lsq_height + scheu->Iw_height + - scheu->ROB_height, + rfu.int_regfile_height + exeu.FU_height + + mul.FU_height + lsq_height + scheu.Iw_height + + scheu.ROB_height, &interface_ip, 3, false, @@ -353,9 +407,9 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu.FU_height + + rfu.int_regfile_height + exeu.FU_height + mul.FU_height + lsq_height + - scheu->Iw_height + scheu->ROB_height, + scheu.Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -374,8 +428,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, + rfu.fp_regfile_height + fp_u.FU_height + lsq_height + + scheu.fp_Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -387,8 +441,8 @@ EXECU::EXECU(const ParseXML *XML_interface, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u.FU_height + lsq_height + - scheu->fp_Iw_height + scheu->ROB_height, + rfu.fp_regfile_height + fp_u.FU_height + lsq_height + + scheu.fp_Iw_height + scheu.ROB_height, &interface_ip, 3, false, @@ -405,19 +459,25 @@ EXECU::EXECU(const ParseXML *XML_interface, area.set_area(area.get_area() + bypass.area.get_area()); } -void EXECU::computeEnergy(bool is_tdp) { + +void EXECU::computeDynamicPower(bool is_tdp) { + if (!init_params) { + std::cerr << "[ EXECU ] Error: must set params before calling " + "computeStaticPower()\n"; + exit(1); + } if (!exist) return; double pppm_t[4] = {1, 1, 1, 1}; - // rfu->power.reset(); - // rfu->rt_power.reset(); - // scheu->power.reset(); - // scheu->rt_power.reset(); + // rfu.power.reset(); + // rfu.rt_power.reset(); + // scheu.power.reset(); + // scheu.rt_power.reset(); // exeu.power.reset(); // exeu.rt_power.reset(); - rfu->computeDynamicPower(is_tdp); - scheu->computeDynamicPower(is_tdp); + rfu.computeDynamicPower(is_tdp); + scheu.computeDynamicPower(is_tdp); if (is_tdp) { exeu.computePower(); } else { @@ -476,7 +536,7 @@ void EXECU::computeEnergy(bool is_tdp) { power = power + fp_u.power; } - power = power + rfu->power + exeu.power + bypass.power + scheu->power; + power = power + rfu.power + exeu.power + bypass.power + scheu.power; } else { set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, @@ -509,8 +569,8 @@ void EXECU::computeEnergy(bool is_tdp) { bypass.rt_power = bypass.rt_power + fpTagBypass.power * pppm_t; rt_power = rt_power + fp_u.rt_power; } - rt_power = rt_power + rfu->rt_power + exeu.rt_power + bypass.rt_power + - scheu->rt_power; + rt_power = rt_power + rfu.rt_power + exeu.rt_power + bypass.rt_power + + scheu.rt_power; } } @@ -526,54 +586,54 @@ void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // bypass->area.get_area() *1e-6 << " mm^2" << endl; if (is_tdp) { cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << rfu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << rfu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << rfu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? rfu->power.readOp.longer_channel_leakage - : rfu->power.readOp.leakage) + << (long_channel ? rfu.power.readOp.longer_channel_leakage + : rfu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? rfu->power.readOp.power_gated_with_long_channel_leakage - : rfu->power.readOp.power_gated_leakage) + ? rfu.power.readOp.power_gated_with_long_channel_leakage + : rfu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; + << "Gate Leakage = " << rfu.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next - << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic / executionTime + << "Runtime Dynamic = " << rfu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 3) { - rfu->displayEnergy(indent + 4, is_tdp); + rfu.displayEnergy(indent + 4, is_tdp); } cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << scheu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << scheu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << scheu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? scheu->power.readOp.longer_channel_leakage - : scheu->power.readOp.leakage) + << (long_channel ? scheu.power.readOp.longer_channel_leakage + : scheu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? scheu->power.readOp.power_gated_with_long_channel_leakage - : scheu->power.readOp.power_gated_leakage) + ? scheu.power.readOp.power_gated_with_long_channel_leakage + : scheu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << scheu.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << scheu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << scheu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 3) { - scheu->displayEnergy(indent + 4, is_tdp); + scheu.displayEnergy(indent + 4, is_tdp); } exeu.display(indent, is_tdp); if (coredynp.num_fpus > 0) { @@ -607,17 +667,17 @@ void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; } else { cout << indent_str_next << "Register Files Peak Dynamic = " - << rfu->rt_power.readOp.dynamic * clockRate << " W" << endl; + << rfu.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Register Files Subthreshold Leakage = " - << rfu->rt_power.readOp.leakage << " W" << endl; + << rfu.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Register Files Gate Leakage = " - << rfu->rt_power.readOp.gate_leakage << " W" << endl; + << rfu.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " - << scheu->rt_power.readOp.dynamic * clockRate << " W" << endl; + << scheu.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " - << scheu->rt_power.readOp.leakage << " W" << endl; + << scheu.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Instruction Sheduler Gate Leakage = " - << scheu->rt_power.readOp.gate_leakage << " W" << endl; + << scheu.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " @@ -631,12 +691,372 @@ EXECU ::~EXECU() { if (!exist) { return; } - if (rfu) { - delete rfu; - rfu = 0; - } - if (scheu) { - delete scheu; - scheu = 0; - } } + +// void EXECU::set_params(const ParseXML *XML_interface, +// int ithCore_, +// InputParameter *interface_ip_, +// double lsq_height_, +// const CoreDynParam &dyn_p_, +// bool exist_){ + +// XML = XML_interface; +// ithCore = ithCore_; +// interface_ip = *interface_ip_; +// coredynp = dyn_p_; +// lsq_height = lsq_height_; +// exist = exist_; + +// bool exist_flag = true; +// if (!exist) { +// return; +// } +// double fu_height = 0.0; +// clockRate = coredynp.clockRate; +// executionTime = coredynp.executionTime; +// rfu.set_params(XML, ithCore, &interface_ip, coredynp); +// rfu.computeArea(); +// rfu.set_stats(XML); +// scheu.set_params(XML, ithCore, &interface_ip, coredynp); +// scheu.computeArea(); +// scheu.set_stats(XML); +// exeu.set_params(XML, ithCore, &interface_ip, coredynp, ALU); +// exeu.set_stats(XML); +// exeu.computeArea(); +// area.set_area(area.get_area() + exeu.area.get_area() + rfu.area.get_area() + +// scheu.area.get_area()); +// fu_height = exeu.FU_height; +// if (coredynp.num_fpus > 0) { +// fp_u.set_params(XML, ithCore, &interface_ip, coredynp, FPU); +// fp_u.set_stats(XML); +// fp_u.computeArea(); +// area.set_area(area.get_area() + fp_u.area.get_area()); +// } +// if (coredynp.num_muls > 0) { +// mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); +// mul.set_stats(XML); +// mul.computeArea(); +// area.set_area(area.get_area() + mul.area.get_area()); +// fu_height += mul.FU_height; +// } +// /* +// * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; +// * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the +// * same bus there are multiple tri-state drivers and muxes that go to +// * different components on the same bus +// */ +// if (XML->sys.Embedded) { +// interface_ip.wt = Global_30; +// interface_ip.wire_is_mat_type = 0; +// interface_ip.wire_os_mat_type = 0; +// interface_ip.throughput = 1.0 / clockRate; +// interface_ip.latency = 1.0 / clockRate; +// } else { +// interface_ip.wt = Global; +// interface_ip.wire_is_mat_type = +// 2; // start from semi-global since local wires are already used +// interface_ip.wire_os_mat_type = 2; +// interface_ip.throughput = 10.0 / clockRate; // Do not care +// interface_ip.latency = 10.0 / clockRate; +// } + +// if (coredynp.core_ty == Inorder) { +// int_bypass.init("Int Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(XML->sys.machine_bits / 32.0) * 32), +// rfu.int_regfile_height + exeu.FU_height + lsq_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); +// intTagBypass.init("Int Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.perThreadState, +// rfu.int_regfile_height + exeu.FU_height + lsq_height + +// scheu.Iw_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + intTagBypass.area.get_area()); + +// if (coredynp.num_muls > 0) { +// int_mul_bypass.init("Mul Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), +// rfu.fp_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// int_mul_bypass.area.get_area()); +// intTag_mul_Bypass.init("Mul Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.perThreadState, +// rfu.fp_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height + scheu.Iw_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// intTag_mul_Bypass.area.get_area()); +// } + +// if (coredynp.num_fpus > 0) { +// fp_bypass.init("FP Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), +// rfu.fp_regfile_height + fp_u.FU_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + fp_bypass.area.get_area()); +// fpTagBypass.init("FP Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.perThreadState, +// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + +// scheu.Iw_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// fpTagBypass.area.get_area()); +// } +// } else { // OOO +// if (coredynp.scheu_ty == PhysicalRegFile) { +// /* For physical register based OOO, +// * data broadcast interconnects cover across functional units, lsq, inst +// * windows and register files, while tag broadcast interconnects also +// * cover across ROB +// */ +// int_bypass.init("Int Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.int_data_width)), +// rfu.int_regfile_height + exeu.FU_height + lsq_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); +// intTagBypass.init("Int Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_ireg_width, +// rfu.int_regfile_height + exeu.FU_height + lsq_height + +// scheu.Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// intTagBypass.area.get_area()); + +// if (coredynp.num_muls > 0) { +// int_mul_bypass.init("Mul Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.int_data_width)), +// rfu.int_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// intTag_mul_Bypass.init("Mul Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_ireg_width, +// rfu.int_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height + +// scheu.Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// int_mul_bypass.area.get_area()); +// bypass.area.set_area(bypass.area.get_area() + +// intTag_mul_Bypass.area.get_area()); +// } + +// if (coredynp.num_fpus > 0) { +// fp_bypass.init("FP Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.fp_data_width)), +// rfu.fp_regfile_height + fp_u.FU_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// fpTagBypass.init("FP Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_freg_width, +// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + +// scheu.fp_Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// fp_bypass.area.get_area()); +// bypass.area.set_area(bypass.area.get_area() + +// fpTagBypass.area.get_area()); +// } +// } else { +// /* +// * In RS based processor both data and tag are broadcast together, +// * covering functional units, lsq, nst windows, register files, and ROBs +// */ +// int_bypass.init("Int Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.int_data_width)), +// rfu.int_regfile_height + exeu.FU_height + lsq_height + +// scheu.Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// intTagBypass.init("Int Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_ireg_width, +// rfu.int_regfile_height + exeu.FU_height + lsq_height + +// scheu.Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); +// bypass.area.set_area(bypass.area.get_area() + +// intTagBypass.area.get_area()); +// if (coredynp.num_muls > 0) { +// int_mul_bypass.init("Mul Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.int_data_width)), +// rfu.int_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height + scheu.Iw_height + +// scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// intTag_mul_Bypass.init("Mul Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_ireg_width, +// rfu.int_regfile_height + exeu.FU_height + +// mul.FU_height + lsq_height + +// scheu.Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// int_mul_bypass.area.get_area()); +// bypass.area.set_area(bypass.area.get_area() + +// intTag_mul_Bypass.area.get_area()); +// } + +// if (coredynp.num_fpus > 0) { +// fp_bypass.init("FP Bypass Data", +// Core_device, +// 1, +// 1, +// int(ceil(coredynp.fp_data_width)), +// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + +// scheu.fp_Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// fpTagBypass.init("FP Bypass tag", +// Core_device, +// 1, +// 1, +// coredynp.phy_freg_width, +// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + +// scheu.fp_Iw_height + scheu.ROB_height, +// &interface_ip, +// 3, +// false, +// 1.0, +// coredynp.opt_local, +// coredynp.core_ty); +// bypass.area.set_area(bypass.area.get_area() + +// fp_bypass.area.get_area()); +// bypass.area.set_area(bypass.area.get_area() + +// fpTagBypass.area.get_area()); +// } +// } +// } +// area.set_area(area.get_area() + bypass.area.get_area()); + +// init_params = true; +// } \ No newline at end of file diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 5334ff5..b35c9b2 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -53,8 +53,8 @@ class EXECU : public Component { double macro_PR_overhead; double lsq_height; CoreDynParam coredynp; - RegFU *rfu; - SchedulerU *scheu; + RegFU rfu; + SchedulerU scheu; FunctionalUnit fp_u; FunctionalUnit exeu; FunctionalUnit mul; @@ -68,15 +68,25 @@ class EXECU : public Component { Component bypass; bool exist; - EXECU(const ParseXML *XML_interface, + EXECU(); + + void set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, double lsq_height_, const CoreDynParam &dyn_p_, bool exist_ = true); - void computeEnergy(bool is_tdp = true); + void set_stats(const ParseXML *XML); + void computeArea(); + void computeStaticPower(); + void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~EXECU(); + + private: + bool init_params; + bool init_stats; + }; #endif // __EXEC_U_H__ diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index fc500d6..b3dfe24 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -391,11 +391,11 @@ void RENAMINGU::set_params(const ParseXML *XML, coredynp.opt_local, coredynp.core_ty); - idcl = new dep_resource_conflict_check( + idcl.set_params( &interface_ip, coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( + fdcl.set_params( &interface_ip, coredynp, coredynp.phy_freg_width); } else if (coredynp.scheu_ty == ReservationStation) { @@ -635,11 +635,11 @@ void RENAMINGU::set_params(const ParseXML *XML, // ifreeL.area.set_area(ifreeL.area.get_area()+ // ifreeL.local_result.area*XML->sys.core[ithCore].number_hardware_threads); - idcl = new dep_resource_conflict_check( + idcl.set_params( &interface_ip, coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( + fdcl.set_params( &interface_ip, coredynp, coredynp.phy_freg_width); } } @@ -648,11 +648,11 @@ void RENAMINGU::set_params(const ParseXML *XML, * Multiple issue in order processor can do without renaming, but dcl is a * must. */ - idcl = new dep_resource_conflict_check( + idcl.set_params( &interface_ip, coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check( + fdcl.set_params( &interface_ip, coredynp, coredynp.phy_freg_width); } init_params = true; @@ -881,16 +881,16 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { coredynp.decodeW; // ifreeL.l_ip.num_wr_ports; ifreeL.tdp_stats = ifreeL.stats_t; } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; + idcl.stats_t.readAc.access = coredynp.decodeW; + fdcl.stats_t.readAc.access = coredynp.decodeW; + idcl.tdp_stats = idcl.stats_t; + fdcl.tdp_stats = fdcl.stats_t; } else { if (coredynp.issueW > 1) { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; + idcl.stats_t.readAc.access = coredynp.decodeW; + fdcl.stats_t.readAc.access = coredynp.decodeW; + idcl.tdp_stats = idcl.stats_t; + fdcl.tdp_stats = fdcl.stats_t; } } @@ -970,18 +970,18 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { ifreeL.stats_t.writeAc.access = 2 * (rename_writes + fp_rename_writes); ifreeL.rtp_stats = ifreeL.stats_t; } - idcl->stats_t.readAc.access = + idcl.stats_t.readAc.access = 3 * coredynp.decodeW * coredynp.decodeW * rename_reads; - fdcl->stats_t.readAc.access = + fdcl.stats_t.readAc.access = 3 * coredynp.fp_issueW * coredynp.fp_issueW * fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; + idcl.rtp_stats = idcl.stats_t; + fdcl.rtp_stats = fdcl.stats_t; } else { if (coredynp.issueW > 1) { - idcl->stats_t.readAc.access = 2 * int_instructions; - fdcl->stats_t.readAc.access = fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; + idcl.stats_t.readAc.access = 2 * int_instructions; + fdcl.stats_t.readAc.access = fp_instructions; + idcl.rtp_stats = idcl.stats_t; + fdcl.rtp_stats = fdcl.stats_t; } } } @@ -995,13 +995,13 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { iFRAT.power_t.readOp.dynamic += (iFRAT.stats_t.readAc.access * (iFRAT.local_result.power.readOp.dynamic + - idcl->power.readOp.dynamic) + + idcl.power.readOp.dynamic) + iFRAT.stats_t.writeAc.access * iFRAT.local_result.power.writeOp.dynamic); fFRAT.power_t.readOp.dynamic += (fFRAT.stats_t.readAc.access * (fFRAT.local_result.power.readOp.dynamic + - fdcl->power.readOp.dynamic) + + fdcl.power.readOp.dynamic) + fFRAT.stats_t.writeAc.access * fFRAT.local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { @@ -1010,13 +1010,13 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { iFRAT.power_t.readOp.dynamic += (iFRAT.stats_t.readAc.access * (iFRAT.local_result.power.searchOp.dynamic + - idcl->power.readOp.dynamic) + + idcl.power.readOp.dynamic) + iFRAT.stats_t.writeAc.access * iFRAT.local_result.power.writeOp.dynamic); fFRAT.power_t.readOp.dynamic += (fFRAT.stats_t.readAc.access * (fFRAT.local_result.power.searchOp.dynamic + - fdcl->power.readOp.dynamic) + + fdcl.power.readOp.dynamic) + fFRAT.stats_t.writeAc.access * fFRAT.local_result.power.writeOp.dynamic); } @@ -1057,13 +1057,13 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { iFRAT.power_t.readOp.dynamic += (iFRAT.stats_t.readAc.access * (iFRAT.local_result.power.readOp.dynamic + - idcl->power.readOp.dynamic) + + idcl.power.readOp.dynamic) + iFRAT.stats_t.writeAc.access * iFRAT.local_result.power.writeOp.dynamic); fFRAT.power_t.readOp.dynamic += (fFRAT.stats_t.readAc.access * (fFRAT.local_result.power.readOp.dynamic + - fdcl->power.readOp.dynamic) + + fdcl.power.readOp.dynamic) + fFRAT.stats_t.writeAc.access * fFRAT.local_result.power.writeOp.dynamic); } else if (coredynp.rm_ty == CAMbased) { @@ -1072,13 +1072,13 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { iFRAT.power_t.readOp.dynamic += (iFRAT.stats_t.readAc.access * (iFRAT.local_result.power.searchOp.dynamic + - idcl->power.readOp.dynamic) + + idcl.power.readOp.dynamic) + iFRAT.stats_t.writeAc.access * iFRAT.local_result.power.writeOp.dynamic); fFRAT.power_t.readOp.dynamic += (fFRAT.stats_t.readAc.access * (fFRAT.local_result.power.searchOp.dynamic + - fdcl->power.readOp.dynamic) + + fdcl.power.readOp.dynamic) + fFRAT.stats_t.writeAc.access * fFRAT.local_result.power.writeOp.dynamic); } @@ -1109,20 +1109,20 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { } else { if (coredynp.issueW > 1) { - idcl->power_t.reset(); - fdcl->power_t.reset(); + idcl.power_t.reset(); + fdcl.power_t.reset(); set_pppm(pppm_t, - idcl->stats_t.readAc.access, + idcl.stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, - idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; + idcl.stats_t.readAc.access); + idcl.power_t = idcl.power * pppm_t; set_pppm(pppm_t, - fdcl->stats_t.readAc.access, + fdcl.stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, - idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; + idcl.stats_t.readAc.access); + fdcl.power_t = fdcl.power * pppm_t; } } @@ -1131,9 +1131,9 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; ffreeL.power = ffreeL.power_t + ffreeL.local_result.power; power = power + @@ -1147,9 +1147,9 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { } } else if (coredynp.scheu_ty == ReservationStation) { iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; power = power + (iFRAT.power + fFRAT.power) + ifreeL.power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { @@ -1159,16 +1159,16 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { } } } else { - power = power + idcl->power_t + fdcl->power_t; + power = power + idcl.power_t + fdcl.power_t; } } else { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { iFRAT.rt_power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; fFRAT.rt_power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; ffreeL.rt_power = ffreeL.power_t + ffreeL.local_result.power; @@ -1185,9 +1185,9 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { } } else if (coredynp.scheu_ty == ReservationStation) { iFRAT.rt_power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl->power_t; + iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; fFRAT.rt_power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl->power_t; + fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.rt_power = ifreeL.power_t + ifreeL.local_result.power; rt_power = rt_power + (iFRAT.rt_power + fFRAT.rt_power) + ifreeL.rt_power; @@ -1198,7 +1198,7 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { } } } else { - rt_power = rt_power + idcl->power_t + fdcl->power_t; + rt_power = rt_power + idcl.power_t + fdcl.power_t; } } } @@ -1362,44 +1362,44 @@ void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { } else { cout << indent_str << "Int DCL:" << endl; cout << indent_str_next - << "Peak Dynamic = " << idcl->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << idcl.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? idcl->power.readOp.longer_channel_leakage - : idcl->power.readOp.leakage) + << (long_channel ? idcl.power.readOp.longer_channel_leakage + : idcl.power.readOp.leakage) << " W" << endl; if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? idcl->power.readOp.power_gated_with_long_channel_leakage - : idcl->power.readOp.power_gated_leakage) + ? idcl.power.readOp.power_gated_with_long_channel_leakage + : idcl.power.readOp.power_gated_leakage) << " W" << endl; } cout << indent_str_next - << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << idcl.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << idcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + << idcl.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << indent_str << "FP DCL:" << endl; cout << indent_str_next - << "Peak Dynamic = " << fdcl->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << fdcl.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? fdcl->power.readOp.longer_channel_leakage - : fdcl->power.readOp.leakage) + << (long_channel ? fdcl.power.readOp.longer_channel_leakage + : fdcl.power.readOp.leakage) << " W" << endl; if (power_gating) { cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? fdcl->power.readOp.power_gated_with_long_channel_leakage - : fdcl->power.readOp.power_gated_leakage) + ? fdcl.power.readOp.power_gated_with_long_channel_leakage + : fdcl.power.readOp.power_gated_leakage) << " W" << endl; } cout << indent_str_next - << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << fdcl.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << fdcl->rt_power.readOp.dynamic / executionTime << " W" << endl; + << fdcl.rt_power.readOp.dynamic / executionTime << " W" << endl; } } else { if (coredynp.core_ty == OOO) { @@ -1445,18 +1445,18 @@ void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { } } else { cout << indent_str_next << "Int DCL Peak Dynamic = " - << idcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + << idcl.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Int DCL Subthreshold Leakage = " - << idcl->rt_power.readOp.leakage << " W" << endl; + << idcl.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage + << "Int DCL Gate Leakage = " << idcl.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "FP DCL Peak Dynamic = " - << fdcl->rt_power.readOp.dynamic * clockRate << " W" << endl; + << fdcl.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "FP DCL Subthreshold Leakage = " - << fdcl->rt_power.readOp.leakage << " W" << endl; + << fdcl.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage + << "FP DCL Gate Leakage = " << fdcl.rt_power.readOp.gate_leakage << " W" << endl; } } @@ -1466,12 +1466,4 @@ RENAMINGU ::~RENAMINGU() { if (!exist) { return; } - if (idcl) { - delete idcl; - idcl = 0; - } - if (fdcl) { - delete fdcl; - fdcl = 0; - } } diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index 54c048a..22d88c3 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -52,8 +52,8 @@ class RENAMINGU : public Component { ArrayST fRRAT; ArrayST ifreeL; ArrayST ffreeL; - dep_resource_conflict_check *idcl; - dep_resource_conflict_check *fdcl; + dep_resource_conflict_check idcl; + dep_resource_conflict_check fdcl; bool exist; RENAMINGU(); diff --git a/src/core/scheduler.cc b/src/core/scheduler.cc index ddb6f11..12a8dc8 100644 --- a/src/core/scheduler.cc +++ b/src/core/scheduler.cc @@ -120,7 +120,7 @@ void SchedulerU::set_params(const ParseXML *XML_interface, */ interface_ip.assoc = 1; // reset to prevent unnecessary warning messages when init_interface - instruction_selection = new selection_logic( + instruction_selection.set_params( is_default, XML->sys.core[ithCore].instruction_window_size, coredynp.peak_issueW * XML->sys.core[ithCore].number_hardware_threads, @@ -370,8 +370,7 @@ void SchedulerU::set_params(const ParseXML *XML_interface, coredynp.opt_local, coredynp.core_ty); } - instruction_selection = - new selection_logic(is_default, + instruction_selection.set_params(is_default, XML->sys.core[ithCore].instruction_window_size, coredynp.peak_issueW, &interface_ip, @@ -418,6 +417,7 @@ void SchedulerU::computeArea() { coredynp.num_pipelines); area.set_area(area.get_area() + int_inst_window.local_result.area * coredynp.num_pipelines); + } if (coredynp.core_ty == OOO) { @@ -728,7 +728,7 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { int_inst_window.local_result.power.writeOp.dynamic * int_inst_window.stats_t.writeAc.access + int_inst_window.stats_t.readAc.access * - instruction_selection->power.readOp.dynamic; + instruction_selection.power.readOp.dynamic; fp_inst_window.power_t.readOp.dynamic += fp_inst_window.local_result.power.readOp.dynamic * @@ -738,7 +738,7 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { fp_inst_window.local_result.power.writeOp.dynamic * fp_inst_window.stats_t.writeAc.access + fp_inst_window.stats_t.writeAc.access * - instruction_selection->power.readOp.dynamic; + instruction_selection.power.readOp.dynamic; if (XML->sys.core[ithCore].ROB_size > 0) { ROB.power_t.reset(); @@ -757,7 +757,7 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { int_inst_window.local_result.power.writeOp.dynamic * int_inst_window.stats_t.writeAc.access + int_inst_window.stats_t.writeAc.access * - instruction_selection->power.readOp.dynamic; + instruction_selection.power.readOp.dynamic; } // assign values @@ -765,11 +765,11 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { if (coredynp.core_ty == OOO) { int_inst_window.power = int_inst_window.power_t + - (int_inst_window.local_result.power + instruction_selection->power) * + (int_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; fp_inst_window.power = fp_inst_window.power_t + - (fp_inst_window.local_result.power + instruction_selection->power) * + (fp_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; power = power + int_inst_window.power + fp_inst_window.power; if (XML->sys.core[ithCore].ROB_size > 0) { @@ -782,7 +782,7 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { // XML->sys.core[ithCore].issue_width,1, 1, 1); int_inst_window.power = int_inst_window.power_t + - (int_inst_window.local_result.power + instruction_selection->power) * + (int_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; power = power + int_inst_window.power; } @@ -791,11 +791,11 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { if (coredynp.core_ty == OOO) { int_inst_window.rt_power = int_inst_window.power_t + - (int_inst_window.local_result.power + instruction_selection->power) * + (int_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; fp_inst_window.rt_power = fp_inst_window.power_t + - (fp_inst_window.local_result.power + instruction_selection->power) * + (fp_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; rt_power = rt_power + int_inst_window.rt_power + fp_inst_window.rt_power; if (XML->sys.core[ithCore].ROB_size > 0) { @@ -808,7 +808,7 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { // XML->sys.core[ithCore].issue_width,1, 1, 1); int_inst_window.rt_power = int_inst_window.power_t + - (int_inst_window.local_result.power + instruction_selection->power) * + (int_inst_window.local_result.power + instruction_selection.power) * pppm_lkg; rt_power = rt_power + int_inst_window.rt_power; } @@ -820,15 +820,9 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { // int_inst_window.stats_t.readAc.access + // + int_inst_window.local_result.power.writeOp.dynamic * // int_inst_window.stats_t.writeAc.access<<"leakage="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<opcode comparator + else + compare_bits += 16 + 8 + 8; + + conflict_check_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + } + void dep_resource_conflict_check::conflict_check_power() { double Ctotal; int num_comparators; diff --git a/src/logic/dep_resource_conflict_check.h b/src/logic/dep_resource_conflict_check.h index f689b5c..7a12dd6 100644 --- a/src/logic/dep_resource_conflict_check.h +++ b/src/logic/dep_resource_conflict_check.h @@ -49,6 +49,8 @@ class dep_resource_conflict_check : public Component { public: + dep_resource_conflict_check(){}; + dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam &dyn_p_, int compare_bits_, @@ -64,6 +66,11 @@ class dep_resource_conflict_check : public Component { statsDef stats_t; powerDef power_t; + void set_params(const InputParameter *configure_interface, + const CoreDynParam &dyn_p_, + int compare_bits_, + bool _is_default = true); + void conflict_check_power(); double compare_cap(); ~dep_resource_conflict_check() { local_result.cleanup(); } diff --git a/src/logic/selection_logic.cc b/src/logic/selection_logic.cc index 2e62cf3..33830e4 100644 --- a/src/logic/selection_logic.cc +++ b/src/logic/selection_logic.cc @@ -63,6 +63,37 @@ selection_logic::selection_logic(bool _is_default, power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; } +void selection_logic::set_params(bool _is_default, int win_entries_, int issue_width_, const InputParameter *configure_interface, enum Device_ty device_ty_ , enum Core_type core_ty_ ) +{ + is_default = _is_default; + win_entries = win_entries_; + issue_width = issue_width_; + device_ty = device_ty_; + core_ty = core_ty_; + + l_ip = *configure_interface; + local_result = init_interface(&l_ip); + // init_tech_params(l_ip.F_sz_um, false); + // win_entries=numIBEntries;//IQentries; + // issue_width=issueWidth; + + selection_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + double pg_reduction = power_gating_leakage_reduction(false); + power.readOp.power_gated_leakage = power.readOp.leakage * pg_reduction; + power.readOp.power_gated_with_long_channel_leakage = + power.readOp.power_gated_leakage * long_channel_device_reduction; +} + void selection_logic::selection_power() { // based on cost effective superscalar // processor TR pp27-31 diff --git a/src/logic/selection_logic.h b/src/logic/selection_logic.h index 0051335..4bb4c1e 100644 --- a/src/logic/selection_logic.h +++ b/src/logic/selection_logic.h @@ -49,6 +49,7 @@ class selection_logic : public Component { public: +selection_logic(){}; selection_logic( bool _is_default, int win_entries_, @@ -65,7 +66,13 @@ class selection_logic : public Component { int num_threads; enum Device_ty device_ty; enum Core_type core_ty; - + + void set_params(bool _is_default, + int win_entries_, + int issue_width_, + const InputParameter *configure_interface, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); void selection_power(); void leakage_feedback(double temperature); // TODO }; From 28a9a6daa93a98a1d05e3fbd2b865c1f2676d4a4 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Sun, 21 Jun 2020 21:42:00 -0500 Subject: [PATCH 37/59] Added stats comment --- src/core/core.cc | 2 +- src/core/exec_unit.cc | 378 ------------------------------------------ 2 files changed, 1 insertion(+), 379 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index 2a4115b..1d042b5 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -79,8 +79,8 @@ Core::Core(const ParseXML *XML_interface, mmu->set_stats(XML); exu.set_params( XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); - exu.set_stats(XML); exu.computeArea(); + exu.set_stats(XML); exu.computeStaticPower(); undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); if (coredynp.core_ty == OOO) { diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index e326385..547d77e 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -105,10 +105,6 @@ void EXECU::set_stats(const ParseXML *XML){ init_stats = true; } -// void EXECU::computeArea(){ - -// } - void EXECU::computeArea(){ if (!init_params) { std::cerr << "[ EXECU ] Error: must set params before calling " @@ -686,377 +682,3 @@ void EXECU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << bypass.rt_power.readOp.gate_leakage << " W" << endl; } } - -EXECU ::~EXECU() { - if (!exist) { - return; - } -} - -// void EXECU::set_params(const ParseXML *XML_interface, -// int ithCore_, -// InputParameter *interface_ip_, -// double lsq_height_, -// const CoreDynParam &dyn_p_, -// bool exist_){ - -// XML = XML_interface; -// ithCore = ithCore_; -// interface_ip = *interface_ip_; -// coredynp = dyn_p_; -// lsq_height = lsq_height_; -// exist = exist_; - -// bool exist_flag = true; -// if (!exist) { -// return; -// } -// double fu_height = 0.0; -// clockRate = coredynp.clockRate; -// executionTime = coredynp.executionTime; -// rfu.set_params(XML, ithCore, &interface_ip, coredynp); -// rfu.computeArea(); -// rfu.set_stats(XML); -// scheu.set_params(XML, ithCore, &interface_ip, coredynp); -// scheu.computeArea(); -// scheu.set_stats(XML); -// exeu.set_params(XML, ithCore, &interface_ip, coredynp, ALU); -// exeu.set_stats(XML); -// exeu.computeArea(); -// area.set_area(area.get_area() + exeu.area.get_area() + rfu.area.get_area() + -// scheu.area.get_area()); -// fu_height = exeu.FU_height; -// if (coredynp.num_fpus > 0) { -// fp_u.set_params(XML, ithCore, &interface_ip, coredynp, FPU); -// fp_u.set_stats(XML); -// fp_u.computeArea(); -// area.set_area(area.get_area() + fp_u.area.get_area()); -// } -// if (coredynp.num_muls > 0) { -// mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); -// mul.set_stats(XML); -// mul.computeArea(); -// area.set_area(area.get_area() + mul.area.get_area()); -// fu_height += mul.FU_height; -// } -// /* -// * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; -// * fp_tag-broadcast integer by pass has two paths and fp has 3 paths. on the -// * same bus there are multiple tri-state drivers and muxes that go to -// * different components on the same bus -// */ -// if (XML->sys.Embedded) { -// interface_ip.wt = Global_30; -// interface_ip.wire_is_mat_type = 0; -// interface_ip.wire_os_mat_type = 0; -// interface_ip.throughput = 1.0 / clockRate; -// interface_ip.latency = 1.0 / clockRate; -// } else { -// interface_ip.wt = Global; -// interface_ip.wire_is_mat_type = -// 2; // start from semi-global since local wires are already used -// interface_ip.wire_os_mat_type = 2; -// interface_ip.throughput = 10.0 / clockRate; // Do not care -// interface_ip.latency = 10.0 / clockRate; -// } - -// if (coredynp.core_ty == Inorder) { -// int_bypass.init("Int Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(XML->sys.machine_bits / 32.0) * 32), -// rfu.int_regfile_height + exeu.FU_height + lsq_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); -// intTagBypass.init("Int Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.perThreadState, -// rfu.int_regfile_height + exeu.FU_height + lsq_height + -// scheu.Iw_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + intTagBypass.area.get_area()); - -// if (coredynp.num_muls > 0) { -// int_mul_bypass.init("Mul Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), -// rfu.fp_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// int_mul_bypass.area.get_area()); -// intTag_mul_Bypass.init("Mul Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.perThreadState, -// rfu.fp_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height + scheu.Iw_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// intTag_mul_Bypass.area.get_area()); -// } - -// if (coredynp.num_fpus > 0) { -// fp_bypass.init("FP Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(XML->sys.machine_bits / 32.0) * 32 * 1.5), -// rfu.fp_regfile_height + fp_u.FU_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + fp_bypass.area.get_area()); -// fpTagBypass.init("FP Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.perThreadState, -// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + -// scheu.Iw_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// fpTagBypass.area.get_area()); -// } -// } else { // OOO -// if (coredynp.scheu_ty == PhysicalRegFile) { -// /* For physical register based OOO, -// * data broadcast interconnects cover across functional units, lsq, inst -// * windows and register files, while tag broadcast interconnects also -// * cover across ROB -// */ -// int_bypass.init("Int Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.int_data_width)), -// rfu.int_regfile_height + exeu.FU_height + lsq_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); -// intTagBypass.init("Int Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_ireg_width, -// rfu.int_regfile_height + exeu.FU_height + lsq_height + -// scheu.Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// intTagBypass.area.get_area()); - -// if (coredynp.num_muls > 0) { -// int_mul_bypass.init("Mul Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.int_data_width)), -// rfu.int_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// intTag_mul_Bypass.init("Mul Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_ireg_width, -// rfu.int_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height + -// scheu.Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// int_mul_bypass.area.get_area()); -// bypass.area.set_area(bypass.area.get_area() + -// intTag_mul_Bypass.area.get_area()); -// } - -// if (coredynp.num_fpus > 0) { -// fp_bypass.init("FP Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.fp_data_width)), -// rfu.fp_regfile_height + fp_u.FU_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// fpTagBypass.init("FP Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_freg_width, -// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + -// scheu.fp_Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// fp_bypass.area.get_area()); -// bypass.area.set_area(bypass.area.get_area() + -// fpTagBypass.area.get_area()); -// } -// } else { -// /* -// * In RS based processor both data and tag are broadcast together, -// * covering functional units, lsq, nst windows, register files, and ROBs -// */ -// int_bypass.init("Int Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.int_data_width)), -// rfu.int_regfile_height + exeu.FU_height + lsq_height + -// scheu.Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// intTagBypass.init("Int Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_ireg_width, -// rfu.int_regfile_height + exeu.FU_height + lsq_height + -// scheu.Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + int_bypass.area.get_area()); -// bypass.area.set_area(bypass.area.get_area() + -// intTagBypass.area.get_area()); -// if (coredynp.num_muls > 0) { -// int_mul_bypass.init("Mul Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.int_data_width)), -// rfu.int_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height + scheu.Iw_height + -// scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// intTag_mul_Bypass.init("Mul Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_ireg_width, -// rfu.int_regfile_height + exeu.FU_height + -// mul.FU_height + lsq_height + -// scheu.Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// int_mul_bypass.area.get_area()); -// bypass.area.set_area(bypass.area.get_area() + -// intTag_mul_Bypass.area.get_area()); -// } - -// if (coredynp.num_fpus > 0) { -// fp_bypass.init("FP Bypass Data", -// Core_device, -// 1, -// 1, -// int(ceil(coredynp.fp_data_width)), -// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + -// scheu.fp_Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// fpTagBypass.init("FP Bypass tag", -// Core_device, -// 1, -// 1, -// coredynp.phy_freg_width, -// rfu.fp_regfile_height + fp_u.FU_height + lsq_height + -// scheu.fp_Iw_height + scheu.ROB_height, -// &interface_ip, -// 3, -// false, -// 1.0, -// coredynp.opt_local, -// coredynp.core_ty); -// bypass.area.set_area(bypass.area.get_area() + -// fp_bypass.area.get_area()); -// bypass.area.set_area(bypass.area.get_area() + -// fpTagBypass.area.get_area()); -// } -// } -// } -// area.set_area(area.get_area() + bypass.area.get_area()); - -// init_params = true; -// } \ No newline at end of file From 2797d31964984854b0e443c9de5ad8a1571826cf Mon Sep 17 00:00:00 2001 From: Andrew Date: Mon, 22 Jun 2020 00:46:15 -0500 Subject: [PATCH 38/59] Fix one bug another 10 appear.... --- src/cache/datacache.cc | 10 +- src/cache/datacache.h | 17 +- src/cache/instcache.cc | 26 +- src/cache/instcache.h | 26 +- src/cache/sharedcache.cc | 425 +++++++++++++------------- src/core/instfetch.cc | 223 +++++++------- src/core/loadstore.cc | 324 ++++++++++---------- src/iocontrollers/flash_controller.cc | 7 +- src/main.cc | 10 +- src/memoryctrl/memoryctrl.h | 1 + src/noc.cc | 14 +- src/noc.h | 6 +- src/processor.h | 2 +- 13 files changed, 560 insertions(+), 531 deletions(-) diff --git a/src/cache/datacache.cc b/src/cache/datacache.cc index 76194fe..110ad88 100644 --- a/src/cache/datacache.cc +++ b/src/cache/datacache.cc @@ -3,12 +3,6 @@ #include #include -DataCache::DataCache() { wbb = nullptr; }; +DataCache::DataCache(){}; -DataCache::~DataCache() { - if (wbb) { - // wbb->local_result.cleanup(); - delete wbb; - wbb = 0; - } -}; +DataCache::~DataCache(){}; diff --git a/src/cache/datacache.h b/src/cache/datacache.h index 035e59f..fb47270 100644 --- a/src/cache/datacache.h +++ b/src/cache/datacache.h @@ -40,14 +40,29 @@ #include "instcache.h" #include "parameter.h" +#include +#include +#include +#include +#include #include #include +#include class DataCache : public InstCache { public: - ArrayST *wbb; + ArrayST wbb; DataCache(); ~DataCache(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &wbb; + Component::serialize(ar, version); + } }; #endif // __DATACACHE_H__ diff --git a/src/cache/instcache.cc b/src/cache/instcache.cc index ac1acbd..3485a21 100644 --- a/src/cache/instcache.cc +++ b/src/cache/instcache.cc @@ -3,28 +3,6 @@ #include #include -InstCache::InstCache() { - caches = nullptr; - missb = nullptr; - ifb = nullptr; - prefetchb = nullptr; -}; +InstCache::InstCache(){}; -InstCache::~InstCache() { - if (caches) { // caches->local_result.cleanup(); - delete caches; - caches = 0; - } - if (missb) { // missb->local_result.cleanup(); - delete missb; - missb = 0; - } - if (ifb) { // ifb->local_result.cleanup(); - delete ifb; - ifb = 0; - } - if (prefetchb) { // prefetchb->local_result.cleanup(); - delete prefetchb; - prefetchb = 0; - } -}; +InstCache::~InstCache(){}; diff --git a/src/cache/instcache.h b/src/cache/instcache.h index 304e6bd..44e3fd5 100644 --- a/src/cache/instcache.h +++ b/src/cache/instcache.h @@ -39,18 +39,36 @@ #include "const.h" #include "parameter.h" +#include +#include +#include +#include +#include #include #include +#include class InstCache : public Component { public: - ArrayST *caches; - ArrayST *missb; - ArrayST *ifb; - ArrayST *prefetchb; + ArrayST caches; + ArrayST missb; + ArrayST ifb; + ArrayST prefetchb; powerDef power_t; // temp value holder for both (max) power and runtime power InstCache(); ~InstCache(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &caches; + ar &missb; + ar &ifb; + ar &prefetchb; + Component::serialize(ar, version); + } }; #endif // __INSTCACHE_H__ diff --git a/src/cache/sharedcache.cc b/src/cache/sharedcache.cc index 7449489..d6b6517 100644 --- a/src/cache/sharedcache.cc +++ b/src/cache/sharedcache.cc @@ -278,18 +278,14 @@ void SharedCache::set_params(const ParseXML *XML, // interface_ip.ndcm =1 ; // interface_ip.ndsam1 =1; // interface_ip.ndsam2 =1; - unicache.caches = - new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area() + - unicache.caches->local_result.area); - area.set_area(area.get_area() + unicache.caches->local_result.area); - interface_ip.force_cache_config = false; + unicache.caches.set_params( + &interface_ip, cachep.name + "cache", device_t, true, core_t); if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + - unicache.caches->l_ip.line_sz; + unicache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = @@ -313,14 +309,12 @@ void SharedCache::set_params(const ParseXML *XML, interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = 1; - unicache.missb = new ArrayST( + unicache.missb.set_params( &interface_ip, cachep.name + "MissB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area() + - unicache.missb->local_result.area); - area.set_area(area.get_area() + unicache.missb->local_result.area); + // fill buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; + data = unicache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); @@ -339,16 +333,14 @@ void SharedCache::set_params(const ParseXML *XML, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - unicache.ifb = new ArrayST( + unicache.ifb.set_params( &interface_ip, cachep.name + "FillB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area() + - unicache.ifb->local_result.area); - area.set_area(area.get_area() + unicache.ifb->local_result.area); + // prefetch buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; // check with previous entries to decide wthether to // merge. - data = unicache.caches->l_ip + data = unicache.caches.l_ip .line_sz; // separate queue to prevent from cache polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; @@ -368,14 +360,12 @@ void SharedCache::set_params(const ParseXML *XML, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - unicache.prefetchb = new ArrayST( + unicache.prefetchb.set_params( &interface_ip, cachep.name + "PrefetchB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area() + - unicache.prefetchb->local_result.area); - area.set_area(area.get_area() + unicache.prefetchb->local_result.area); + // WBB tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; + data = unicache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; @@ -394,11 +384,8 @@ void SharedCache::set_params(const ParseXML *XML, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - unicache.wbb = - new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area() + - unicache.wbb->local_result.area); - area.set_area(area.get_area() + unicache.wbb->local_result.area); + unicache.wbb.set_params( + &interface_ip, cachep.name + "WBB", device_t, true, core_t); } init_params = true; } @@ -414,6 +401,38 @@ void SharedCache::computeArea() { "computeArea()\n"; exit(1); } + + unicache.caches.computeArea(); + unicache.area.set_area(unicache.area.get_area() + + unicache.caches.local_result.area); + area.set_area(area.get_area() + unicache.caches.local_result.area); + interface_ip.force_cache_config = false; + + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || + (cachep.dir_ty == ST && cacheL == L2Directory))) { + unicache.missb.computeArea(); + unicache.area.set_area(unicache.area.get_area() + + unicache.missb.local_result.area); + area.set_area(area.get_area() + unicache.missb.local_result.area); + + // Fill Buffer: + unicache.ifb.computeArea(); + unicache.area.set_area(unicache.area.get_area() + + unicache.ifb.local_result.area); + area.set_area(area.get_area() + unicache.ifb.local_result.area); + + // Prefetch Buffer: + unicache.prefetchb.computeArea(); + unicache.area.set_area(unicache.area.get_area() + + unicache.prefetchb.local_result.area); + area.set_area(area.get_area() + unicache.prefetchb.local_result.area); + + // WBB: + unicache.wbb.computeArea(); + unicache.area.set_area(unicache.area.get_area() + + unicache.wbb.local_result.area); + area.set_area(area.get_area() + unicache.wbb.local_result.area); + } set_area = true; } @@ -433,31 +452,31 @@ void SharedCache::computeStaticPower(bool is_tdp) { if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { // init stats for Peak - unicache.caches->stats_t.readAc.access = - .67 * unicache.caches->l_ip.num_rw_ports * cachep.duty_cycle * + unicache.caches.stats_t.readAc.access = + .67 * unicache.caches.l_ip.num_rw_ports * cachep.duty_cycle * homenode_data_access; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = - .33 * unicache.caches->l_ip.num_rw_ports * cachep.duty_cycle * + unicache.caches.stats_t.readAc.miss = 0; + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = + .33 * unicache.caches.l_ip.num_rw_ports * cachep.duty_cycle * homenode_data_access; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.writeAc.miss = 0; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.tdp_stats = unicache.caches.stats_t; if (cachep.dir_ty == SBT) { homenode_stats_t.readAc.access = - .67 * unicache.caches->l_ip.num_rw_ports * cachep.dir_duty_cycle * + .67 * unicache.caches.l_ip.num_rw_ports * cachep.dir_duty_cycle * (1 - homenode_data_access); homenode_stats_t.readAc.miss = 0; homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; homenode_stats_t.writeAc.access = - .67 * unicache.caches->l_ip.num_rw_ports * cachep.dir_duty_cycle * + .67 * unicache.caches.l_ip.num_rw_ports * cachep.dir_duty_cycle * (1 - homenode_data_access); homenode_stats_t.writeAc.miss = 0; homenode_stats_t.writeAc.hit = @@ -465,61 +484,60 @@ void SharedCache::computeStaticPower(bool is_tdp) { homenode_tdp_stats = homenode_stats_t; } - unicache.missb->stats_t.readAc.access = - unicache.missb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.missb->stats_t.writeAc.access = - unicache.missb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.missb->tdp_stats = unicache.missb->stats_t; + unicache.missb.stats_t.readAc.access = + unicache.missb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.missb.stats_t.writeAc.access = + unicache.missb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.missb.tdp_stats = unicache.missb.stats_t; - unicache.ifb->stats_t.readAc.access = - unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.ifb->stats_t.writeAc.access = - unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.ifb->tdp_stats = unicache.ifb->stats_t; + unicache.ifb.stats_t.readAc.access = + unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.ifb.stats_t.writeAc.access = + unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.ifb.tdp_stats = unicache.ifb.stats_t; - unicache.prefetchb->stats_t.readAc.access = - unicache.prefetchb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.prefetchb->stats_t.writeAc.access = - unicache.ifb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t; + unicache.prefetchb.stats_t.readAc.access = + unicache.prefetchb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.prefetchb.stats_t.writeAc.access = + unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.prefetchb.tdp_stats = unicache.prefetchb.stats_t; - unicache.wbb->stats_t.readAc.access = - unicache.wbb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.wbb->stats_t.writeAc.access = - unicache.wbb->l_ip.num_search_ports * cachep.duty_cycle; - unicache.wbb->tdp_stats = unicache.wbb->stats_t; + unicache.wbb.stats_t.readAc.access = + unicache.wbb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.wbb.stats_t.writeAc.access = + unicache.wbb.l_ip.num_search_ports * cachep.duty_cycle; + unicache.wbb.tdp_stats = unicache.wbb.stats_t; } else { - unicache.caches->stats_t.readAc.access = - unicache.caches->l_ip.num_search_ports * cachep.duty_cycle; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = 0; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.readAc.access = + unicache.caches.l_ip.num_search_ports * cachep.duty_cycle; + unicache.caches.stats_t.readAc.miss = 0; + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = 0; + unicache.caches.stats_t.writeAc.miss = 0; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.tdp_stats = unicache.caches.stats_t; } } else { // init stats for runtime power (RTP) if (cacheL == L2) { - unicache.caches->stats_t.readAc.access = + unicache.caches.stats_t.readAc.access = XML->sys.L2[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = + unicache.caches.stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = XML->sys.L2[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = - XML->sys.L2[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.writeAc.miss = XML->sys.L2[ithCache].write_misses; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.rtp_stats = unicache.caches.stats_t; if (cachep.dir_ty == SBT) { homenode_rtp_stats.readAc.access = @@ -536,20 +554,19 @@ void SharedCache::computeStaticPower(bool is_tdp) { homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; } } else if (cacheL == L3) { - unicache.caches->stats_t.readAc.access = + unicache.caches.stats_t.readAc.access = XML->sys.L3[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = + unicache.caches.stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = XML->sys.L3[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = - XML->sys.L3[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.writeAc.miss = XML->sys.L3[ithCache].write_misses; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.rtp_stats = unicache.caches.stats_t; if (cachep.dir_ty == SBT) { homenode_rtp_stats.readAc.access = @@ -566,92 +583,88 @@ void SharedCache::computeStaticPower(bool is_tdp) { homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; } } else if (cacheL == L1Directory) { - unicache.caches->stats_t.readAc.access = + unicache.caches.stats_t.readAc.access = XML->sys.L1Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = + unicache.caches.stats_t.readAc.miss = XML->sys.L1Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = XML->sys.L1Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = + unicache.caches.stats_t.writeAc.miss = XML->sys.L1Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.rtp_stats = unicache.caches.stats_t; } else if (cacheL == L2Directory) { - unicache.caches->stats_t.readAc.access = + unicache.caches.stats_t.readAc.access = XML->sys.L2Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = + unicache.caches.stats_t.readAc.miss = XML->sys.L2Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = - unicache.caches->stats_t.readAc.access - - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = + unicache.caches.stats_t.readAc.hit = + unicache.caches.stats_t.readAc.access - + unicache.caches.stats_t.readAc.miss; + unicache.caches.stats_t.writeAc.access = XML->sys.L2Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = + unicache.caches.stats_t.writeAc.miss = XML->sys.L2Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = - unicache.caches->stats_t.writeAc.access - - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; + unicache.caches.stats_t.writeAc.hit = + unicache.caches.stats_t.writeAc.access - + unicache.caches.stats_t.writeAc.miss; + unicache.caches.rtp_stats = unicache.caches.stats_t; } if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { // Assuming write back and write-allocate cache - unicache.missb->stats_t.readAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.missb->stats_t.writeAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; + unicache.missb.stats_t.readAc.access = + unicache.caches.stats_t.writeAc.miss; + unicache.missb.stats_t.writeAc.access = + unicache.caches.stats_t.writeAc.miss; + unicache.missb.rtp_stats = unicache.missb.stats_t; - unicache.ifb->stats_t.readAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.ifb->stats_t.writeAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; + unicache.ifb.stats_t.readAc.access = unicache.caches.stats_t.writeAc.miss; + unicache.ifb.stats_t.writeAc.access = + unicache.caches.stats_t.writeAc.miss; + unicache.ifb.rtp_stats = unicache.ifb.stats_t; - unicache.prefetchb->stats_t.readAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; + unicache.prefetchb.stats_t.readAc.access = + unicache.caches.stats_t.writeAc.miss; + unicache.prefetchb.stats_t.writeAc.access = + unicache.caches.stats_t.writeAc.miss; + unicache.prefetchb.rtp_stats = unicache.prefetchb.stats_t; - unicache.wbb->stats_t.readAc.access = - unicache.caches->stats_t.writeAc.miss; - unicache.wbb->stats_t.writeAc.access = - unicache.caches->stats_t.writeAc.miss; + unicache.wbb.stats_t.readAc.access = unicache.caches.stats_t.writeAc.miss; + unicache.wbb.stats_t.writeAc.access = + unicache.caches.stats_t.writeAc.miss; if (cachep.dir_ty == SBT) { - unicache.missb->stats_t.readAc.access += - homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += + unicache.missb.stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.missb.stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; + unicache.missb.rtp_stats = unicache.missb.stats_t; - unicache.missb->stats_t.readAc.access += - homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += + unicache.missb.stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.missb.stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; + unicache.missb.rtp_stats = unicache.missb.stats_t; - unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; + unicache.ifb.stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.ifb.stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; + unicache.ifb.rtp_stats = unicache.ifb.stats_t; - unicache.prefetchb->stats_t.readAc.access += + unicache.prefetchb.stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access += + unicache.prefetchb.stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; + unicache.prefetchb.rtp_stats = unicache.prefetchb.stats_t; - unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; + unicache.wbb.stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; + unicache.wbb.stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; } - unicache.wbb->rtp_stats = unicache.wbb->stats_t; + unicache.wbb.rtp_stats = unicache.wbb.stats_t; } } @@ -659,93 +672,91 @@ void SharedCache::computeStaticPower(bool is_tdp) { if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { unicache.power_t.readOp.dynamic += - (unicache.caches->stats_t.readAc.hit * - unicache.caches->local_result.power.readOp.dynamic + - unicache.caches->stats_t.readAc.miss * - unicache.caches->local_result.tag_array2->power.readOp.dynamic + - unicache.caches->stats_t.writeAc.miss * - unicache.caches->local_result.tag_array2->power.writeOp.dynamic + - unicache.caches->stats_t.writeAc.access * - unicache.caches->local_result.power.writeOp + (unicache.caches.stats_t.readAc.hit * + unicache.caches.local_result.power.readOp.dynamic + + unicache.caches.stats_t.readAc.miss * + unicache.caches.local_result.tag_array2->power.readOp.dynamic + + unicache.caches.stats_t.writeAc.miss * + unicache.caches.local_result.tag_array2->power.writeOp.dynamic + + unicache.caches.stats_t.writeAc.access * + unicache.caches.local_result.power.writeOp .dynamic); // write miss will also generate a write later if (cachep.dir_ty == SBT) { unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * - (unicache.caches->local_result.data_array2->power.readOp.dynamic * + (unicache.caches.local_result.data_array2->power.readOp.dynamic * dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic) + + unicache.caches.local_result.tag_array2->power.readOp.dynamic) + homenode_stats_t.readAc.miss * - unicache.caches->local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.tag_array2->power.readOp.dynamic + homenode_stats_t.writeAc.miss * - unicache.caches->local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.tag_array2->power.readOp.dynamic + homenode_stats_t.writeAc.hit * - (unicache.caches->local_result.data_array2->power.writeOp - .dynamic * + (unicache.caches.local_result.data_array2->power.writeOp.dynamic * dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.tag_array2->power.readOp.dynamic + homenode_stats_t.writeAc.miss * - unicache.caches->local_result.power.writeOp + unicache.caches.local_result.power.writeOp .dynamic); // write miss on dynamic home node will // generate a replacement write on whole cache // block } unicache.power_t.readOp.dynamic += - unicache.missb->stats_t.readAc.access * - unicache.missb->local_result.power.searchOp.dynamic + - unicache.missb->stats_t.writeAc.access * - unicache.missb->local_result.power.writeOp + unicache.missb.stats_t.readAc.access * + unicache.missb.local_result.power.searchOp.dynamic + + unicache.missb.stats_t.writeAc.access * + unicache.missb.local_result.power.writeOp .dynamic; // each access to missb involves a CAM and a write unicache.power_t.readOp.dynamic += - unicache.ifb->stats_t.readAc.access * - unicache.ifb->local_result.power.searchOp.dynamic + - unicache.ifb->stats_t.writeAc.access * - unicache.ifb->local_result.power.writeOp.dynamic; + unicache.ifb.stats_t.readAc.access * + unicache.ifb.local_result.power.searchOp.dynamic + + unicache.ifb.stats_t.writeAc.access * + unicache.ifb.local_result.power.writeOp.dynamic; unicache.power_t.readOp.dynamic += - unicache.prefetchb->stats_t.readAc.access * - unicache.prefetchb->local_result.power.searchOp.dynamic + - unicache.prefetchb->stats_t.writeAc.access * - unicache.prefetchb->local_result.power.writeOp.dynamic; + unicache.prefetchb.stats_t.readAc.access * + unicache.prefetchb.local_result.power.searchOp.dynamic + + unicache.prefetchb.stats_t.writeAc.access * + unicache.prefetchb.local_result.power.writeOp.dynamic; unicache.power_t.readOp.dynamic += - unicache.wbb->stats_t.readAc.access * - unicache.wbb->local_result.power.searchOp.dynamic + - unicache.wbb->stats_t.writeAc.access * - unicache.wbb->local_result.power.writeOp.dynamic; + unicache.wbb.stats_t.readAc.access * + unicache.wbb.local_result.power.searchOp.dynamic + + unicache.wbb.stats_t.writeAc.access * + unicache.wbb.local_result.power.writeOp.dynamic; } else { unicache.power_t.readOp.dynamic += - (unicache.caches->stats_t.readAc.access * - unicache.caches->local_result.power.searchOp.dynamic + - unicache.caches->stats_t.writeAc.access * - unicache.caches->local_result.power.writeOp.dynamic); + (unicache.caches.stats_t.readAc.access * + unicache.caches.local_result.power.searchOp.dynamic + + unicache.caches.stats_t.writeAc.access * + unicache.caches.local_result.power.writeOp.dynamic); } if (is_tdp) { unicache.power = - unicache.power_t + (unicache.caches->local_result.power) * pppm_lkg; + unicache.power_t + (unicache.caches.local_result.power) * pppm_lkg; if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { - unicache.power = - unicache.power + (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power) * - pppm_lkg; + unicache.power = unicache.power + (unicache.missb.local_result.power + + unicache.ifb.local_result.power + + unicache.prefetchb.local_result.power + + unicache.wbb.local_result.power) * + pppm_lkg; } power = power + unicache.power; - // cout<<"unicache.caches->local_result.power.readOp.dynamic"<local_result.power.readOp.dynamic<local_result.power.writeOp.dynamic"<local_result.power.writeOp.dynamic<local_result.power) * pppm_lkg; + unicache.power_t + (unicache.caches.local_result.power) * pppm_lkg; if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { unicache.rt_power = - unicache.rt_power + (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power) * - pppm_lkg; + unicache.rt_power + + (unicache.missb.local_result.power + unicache.ifb.local_result.power + + unicache.prefetchb.local_result.power + + unicache.wbb.local_result.power) * + pppm_lkg; } rt_power = rt_power + unicache.rt_power; } diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index f12b590..afae99b 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -101,17 +101,18 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, - "icache", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + icache.caches.set_params(&interface_ip, + "icache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.caches.computeArea(); scktRatio = g_tp.sckt_co_eff; chip_PR_overhead = g_tp.chip_layout_overhead; macro_PR_overhead = g_tp.macro_layout_overhead; icache.area.set_area(icache.area.get_area() + - icache.caches->local_result.area); - area.set_area(area.get_area() + icache.caches->local_result.area); + icache.caches.local_result.area); + area.set_area(area.get_area() + icache.caches.local_result.area); // output_data_csv(icache.caches.local_result); /* @@ -127,7 +128,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + - icache.caches->l_ip.line_sz * 8; + icache.caches.l_ip.line_sz * 8; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = @@ -157,19 +158,19 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, - "icacheMissBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - icache.area.set_area(icache.area.get_area() + - icache.missb->local_result.area); - area.set_area(area.get_area() + icache.missb->local_result.area); + icache.missb.set_params(&interface_ip, + "icacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.missb.computeArea(); + icache.area.set_area(icache.area.get_area() + icache.missb.local_result.area); + area.set_area(area.get_area() + icache.missb.local_result.area); // output_data_csv(icache.missb.local_result); // fill buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; + data = icache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); @@ -195,20 +196,21 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, - "icacheFillBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); - area.set_area(area.get_area() + icache.ifb->local_result.area); + icache.ifb.set_params(&interface_ip, + "icacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.ifb.computeArea(); + icache.area.set_area(icache.area.get_area() + icache.ifb.local_result.area); + area.set_area(area.get_area() + icache.ifb.local_result.area); // output_data_csv(icache.ifb.local_result); // prefetch buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; // check with previous entries to decide wthether to // merge. - data = icache.caches->l_ip + data = icache.caches.l_ip .line_sz; // separate queue to prevent from cache polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; @@ -236,14 +238,15 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, - "icacheprefetchBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + icache.prefetchb.set_params(&interface_ip, + "icacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + icache.prefetchb.computeArea(); icache.area.set_area(icache.area.get_area() + - icache.prefetchb->local_result.area); - area.set_area(area.get_area() + icache.prefetchb->local_result.area); + icache.prefetchb.local_result.area); + area.set_area(area.get_area() + icache.prefetchb.local_result.area); // output_data_csv(icache.prefetchb.local_result); // Instruction buffer @@ -404,31 +407,31 @@ void InstFetchU::computeEnergy(bool is_tdp) { return; if (is_tdp) { // init stats for Peak - icache.caches->stats_t.readAc.access = - icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = - icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = - icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.missb->tdp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.ifb->tdp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = - icache.prefetchb->stats_t.readAc.hit = - icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = - icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; + icache.caches.stats_t.readAc.access = + icache.caches.l_ip.num_rw_ports * coredynp.IFU_duty_cycle; + icache.caches.stats_t.readAc.miss = 0; + icache.caches.stats_t.readAc.hit = + icache.caches.stats_t.readAc.access - icache.caches.stats_t.readAc.miss; + icache.caches.tdp_stats = icache.caches.stats_t; + + icache.missb.stats_t.readAc.access = icache.missb.stats_t.readAc.hit = + icache.missb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb.stats_t.writeAc.access = icache.missb.stats_t.writeAc.hit = + icache.missb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.missb.tdp_stats = icache.missb.stats_t; + + icache.ifb.stats_t.readAc.access = icache.ifb.stats_t.readAc.hit = + icache.ifb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb.stats_t.writeAc.access = icache.ifb.stats_t.writeAc.hit = + icache.ifb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.ifb.tdp_stats = icache.ifb.stats_t; + + icache.prefetchb.stats_t.readAc.access = + icache.prefetchb.stats_t.readAc.hit = + icache.prefetchb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb.stats_t.writeAc.access = icache.ifb.stats_t.writeAc.hit = + icache.ifb.l_ip.num_search_ports * coredynp.IFU_duty_cycle; + icache.prefetchb.tdp_stats = icache.prefetchb.stats_t; IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; @@ -450,27 +453,25 @@ void InstFetchU::computeEnergy(bool is_tdp) { } else { // init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = + icache.caches.stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = + icache.caches.stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; + icache.caches.stats_t.readAc.hit = + icache.caches.stats_t.readAc.access - icache.caches.stats_t.readAc.miss; + icache.caches.rtp_stats = icache.caches.stats_t; - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; + icache.missb.stats_t.readAc.access = icache.caches.stats_t.readAc.miss; + icache.missb.stats_t.writeAc.access = icache.caches.stats_t.readAc.miss; + icache.missb.rtp_stats = icache.missb.stats_t; - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; + icache.ifb.stats_t.readAc.access = icache.caches.stats_t.readAc.miss; + icache.ifb.stats_t.writeAc.access = icache.caches.stats_t.readAc.miss; + icache.ifb.rtp_stats = icache.ifb.stats_t; - icache.prefetchb->stats_t.readAc.access = - icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = - icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; + icache.prefetchb.stats_t.readAc.access = icache.caches.stats_t.readAc.miss; + icache.prefetchb.stats_t.writeAc.access = icache.caches.stats_t.readAc.miss; + icache.prefetchb.rtp_stats = icache.prefetchb.stats_t; IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; @@ -506,31 +507,31 @@ void InstFetchU::computeEnergy(bool is_tdp) { } icache.power_t.readOp.dynamic += - (icache.caches->stats_t.readAc.hit * - icache.caches->local_result.power.readOp.dynamic + - // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss * - icache.caches->local_result.power.readOp + (icache.caches.stats_t.readAc.hit * + icache.caches.local_result.power.readOp.dynamic + + // icache.caches.stats_t.readAc.miss*icache.caches.local_result.tag_array2->power.readOp.dynamic+ + icache.caches.stats_t.readAc.miss * + icache.caches.local_result.power.readOp .dynamic + // assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss * - icache.caches->local_result.power.writeOp + icache.caches.stats_t.readAc.miss * + icache.caches.local_result.power.writeOp .dynamic); // read miss in Icache cause a write to Icache icache.power_t.readOp.dynamic += - icache.missb->stats_t.readAc.access * - icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access * - icache.missb->local_result.power.writeOp + icache.missb.stats_t.readAc.access * + icache.missb.local_result.power.searchOp.dynamic + + icache.missb.stats_t.writeAc.access * + icache.missb.local_result.power.writeOp .dynamic; // each access to missb involves a CAM and a write icache.power_t.readOp.dynamic += - icache.ifb->stats_t.readAc.access * - icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access * - icache.ifb->local_result.power.writeOp.dynamic; + icache.ifb.stats_t.readAc.access * + icache.ifb.local_result.power.searchOp.dynamic + + icache.ifb.stats_t.writeAc.access * + icache.ifb.local_result.power.writeOp.dynamic; icache.power_t.readOp.dynamic += - icache.prefetchb->stats_t.readAc.access * - icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access * - icache.prefetchb->local_result.power.writeOp.dynamic; + icache.prefetchb.stats_t.readAc.access * + icache.prefetchb.local_result.power.searchOp.dynamic + + icache.prefetchb.stats_t.writeAc.access * + icache.prefetchb.local_result.power.writeOp.dynamic; IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + @@ -546,15 +547,15 @@ void InstFetchU::computeEnergy(bool is_tdp) { if (is_tdp) { // icache.power = icache.power_t + - // (icache.caches->local_result.power)*pppm_lkg + - // (icache.missb->local_result.power + - // icache.ifb->local_result.power + - // icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power) * - pppm_lkg; + // (icache.caches.local_result.power)*pppm_lkg + + // (icache.missb.local_result.power + + // icache.ifb.local_result.power + + // icache.prefetchb.local_result.power)*pppm_Isub; + icache.power = + icache.power_t + + (icache.caches.local_result.power + icache.missb.local_result.power + + icache.ifb.local_result.power + icache.prefetchb.local_result.power) * + pppm_lkg; IB->power = IB->power_t + IB->local_result.power * pppm_lkg; power = power + icache.power + IB->power; @@ -574,16 +575,16 @@ void InstFetchU::computeEnergy(bool is_tdp) { power = power + (ID_inst->power + ID_operand->power + ID_misc->power); } else { // icache.rt_power = icache.power_t + - // (icache.caches->local_result.power)*pppm_lkg + - // (icache.missb->local_result.power + - // icache.ifb->local_result.power + - // icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power) * - pppm_lkg; + // (icache.caches.local_result.power)*pppm_lkg + + // (icache.missb.local_result.power + + // icache.ifb.local_result.power + + // icache.prefetchb.local_result.power)*pppm_Isub; + + icache.rt_power = + icache.power_t + + (icache.caches.local_result.power + icache.missb.local_result.power + + icache.ifb.local_result.power + icache.prefetchb.local_result.power) * + pppm_lkg; IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; rt_power = rt_power + icache.rt_power + IB->rt_power; diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc index 6545b95..12e3784 100644 --- a/src/core/loadstore.cc +++ b/src/core/loadstore.cc @@ -105,21 +105,22 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, - "dcache", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + dcache.caches.set_params(&interface_ip, + "dcache", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.caches.computeArea(); dcache.area.set_area(dcache.area.get_area() + - dcache.caches->local_result.area); - area.set_area(area.get_area() + dcache.caches->local_result.area); + dcache.caches.local_result.area); + area.set_area(area.get_area() + dcache.caches.local_result.area); // output_data_csv(dcache.caches.local_result); // dCache controllers // miss buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + - dcache.caches->l_ip.line_sz * 8; + dcache.caches.l_ip.line_sz * 8; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = @@ -145,19 +146,19 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, - "dcacheMissBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.missb->local_result.area); - area.set_area(area.get_area() + dcache.missb->local_result.area); + dcache.missb.set_params(&interface_ip, + "dcacheMissBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.missb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.missb.local_result.area); + area.set_area(area.get_area() + dcache.missb.local_result.area); // output_data_csv(dcache.missb.local_result); // fill buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; + data = dcache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); @@ -181,20 +182,21 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, - "dcacheFillBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + dcache.ifb->local_result.area); - area.set_area(area.get_area() + dcache.ifb->local_result.area); + dcache.ifb.set_params(&interface_ip, + "dcacheFillBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.ifb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.ifb.local_result.area); + area.set_area(area.get_area() + dcache.ifb.local_result.area); // output_data_csv(dcache.ifb.local_result); // prefetch buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; // check with previous entries to decide wthether to // merge. - data = dcache.caches->l_ip + data = dcache.caches.l_ip .line_sz; // separate queue to prevent from cache polution. interface_ip.specific_tag = 1; interface_ip.tag_w = tag; @@ -220,21 +222,22 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, - "dcacheprefetchBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + dcache.prefetchb.set_params(&interface_ip, + "dcacheprefetchBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.prefetchb.computeArea(); dcache.area.set_area(dcache.area.get_area() + - dcache.prefetchb->local_result.area); - area.set_area(area.get_area() + dcache.prefetchb->local_result.area); + dcache.prefetchb.local_result.area); + area.set_area(area.get_area() + dcache.prefetchb.local_result.area); // output_data_csv(dcache.prefetchb.local_result); // WBB if (cache_p == Write_back) { tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; + data = dcache.caches.l_ip.line_sz; interface_ip.specific_tag = 1; interface_ip.tag_w = tag; interface_ip.line_sz = data; @@ -258,14 +261,14 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, - "dcacheWBB", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area() + - dcache.wbb->local_result.area); - area.set_area(area.get_area() + dcache.wbb->local_result.area); + dcache.wbb.set_params(&interface_ip, + "dcacheWBB", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + dcache.wbb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.wbb.local_result.area); + area.set_area(area.get_area() + dcache.wbb.local_result.area); // output_data_csv(dcache.wbb.local_result); } @@ -354,39 +357,39 @@ void LoadStoreU::computeEnergy(bool is_tdp) { return; if (is_tdp) { // init stats for Peak - dcache.caches->stats_t.readAc.access = - 0.67 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = - 0.33 * dcache.caches->l_ip.num_rw_ports * coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = - dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.missb->stats_t.writeAc.access = - dcache.missb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.ifb->stats_t.writeAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.prefetchb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.prefetchb->stats_t.writeAc.access = - dcache.ifb->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; + dcache.caches.stats_t.readAc.access = + 0.67 * dcache.caches.l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches.stats_t.readAc.miss = 0; + dcache.caches.stats_t.readAc.hit = + dcache.caches.stats_t.readAc.access - dcache.caches.stats_t.readAc.miss; + dcache.caches.stats_t.writeAc.access = + 0.33 * dcache.caches.l_ip.num_rw_ports * coredynp.LSU_duty_cycle; + dcache.caches.stats_t.writeAc.miss = 0; + dcache.caches.stats_t.writeAc.hit = dcache.caches.stats_t.writeAc.access - + dcache.caches.stats_t.writeAc.miss; + dcache.caches.tdp_stats = dcache.caches.stats_t; + + dcache.missb.stats_t.readAc.access = + dcache.missb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb.stats_t.writeAc.access = + dcache.missb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.missb.tdp_stats = dcache.missb.stats_t; + + dcache.ifb.stats_t.readAc.access = + dcache.ifb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb.stats_t.writeAc.access = + dcache.ifb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.ifb.tdp_stats = dcache.ifb.stats_t; + + dcache.prefetchb.stats_t.readAc.access = + dcache.prefetchb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb.stats_t.writeAc.access = + dcache.ifb.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + dcache.prefetchb.tdp_stats = dcache.prefetchb.stats_t; if (cache_p == Write_back) { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; + dcache.wbb.stats_t.readAc.access = dcache.wbb.l_ip.num_search_ports; + dcache.wbb.stats_t.writeAc.access = dcache.wbb.l_ip.num_search_ports; + dcache.wbb.tdp_stats = dcache.wbb.stats_t; } LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = @@ -400,53 +403,52 @@ void LoadStoreU::computeEnergy(bool is_tdp) { } } else { // init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = + dcache.caches.stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = + dcache.caches.stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = + dcache.caches.stats_t.readAc.hit = + dcache.caches.stats_t.readAc.access - dcache.caches.stats_t.readAc.miss; + dcache.caches.stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = + dcache.caches.stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; + dcache.caches.stats_t.writeAc.hit = dcache.caches.stats_t.writeAc.access - + dcache.caches.stats_t.writeAc.miss; + dcache.caches.rtp_stats = dcache.caches.stats_t; if (cache_p == Write_back) { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = - dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; + dcache.missb.stats_t.readAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.missb.stats_t.writeAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.missb.rtp_stats = dcache.missb.stats_t; + + dcache.ifb.stats_t.readAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.ifb.stats_t.writeAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.ifb.rtp_stats = dcache.ifb.stats_t; + + dcache.prefetchb.stats_t.readAc.access = + dcache.caches.stats_t.writeAc.miss; + dcache.prefetchb.stats_t.writeAc.access = + dcache.caches.stats_t.writeAc.miss; + dcache.prefetchb.rtp_stats = dcache.prefetchb.stats_t; + + dcache.wbb.stats_t.readAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.wbb.stats_t.writeAc.access = dcache.caches.stats_t.writeAc.miss; + dcache.wbb.rtp_stats = dcache.wbb.stats_t; } else { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = - dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = - dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; + dcache.missb.stats_t.readAc.access = dcache.caches.stats_t.readAc.miss; + dcache.missb.stats_t.writeAc.access = dcache.caches.stats_t.readAc.miss; + dcache.missb.rtp_stats = dcache.missb.stats_t; + + dcache.ifb.stats_t.readAc.access = dcache.caches.stats_t.readAc.miss; + dcache.ifb.stats_t.writeAc.access = dcache.caches.stats_t.readAc.miss; + dcache.ifb.rtp_stats = dcache.ifb.stats_t; + + dcache.prefetchb.stats_t.readAc.access = + dcache.caches.stats_t.readAc.miss; + dcache.prefetchb.stats_t.writeAc.access = + dcache.caches.stats_t.readAc.miss; + dcache.prefetchb.rtp_stats = dcache.prefetchb.stats_t; } LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + @@ -470,45 +472,45 @@ void LoadStoreU::computeEnergy(bool is_tdp) { dcache.power_t.reset(); LSQ->power_t.reset(); dcache.power_t.readOp.dynamic += - (dcache.caches->stats_t.readAc.hit * - dcache.caches->local_result.power.readOp.dynamic + - dcache.caches->stats_t.readAc.miss * - dcache.caches->local_result.power.readOp + (dcache.caches.stats_t.readAc.hit * + dcache.caches.local_result.power.readOp.dynamic + + dcache.caches.stats_t.readAc.miss * + dcache.caches.local_result.power.readOp .dynamic + // assuming D cache is in the fast model which read // tag and data together - dcache.caches->stats_t.writeAc.miss * - dcache.caches->local_result.tag_array2->power.readOp.dynamic + - dcache.caches->stats_t.writeAc.access * - dcache.caches->local_result.power.writeOp.dynamic); + dcache.caches.stats_t.writeAc.miss * + dcache.caches.local_result.tag_array2->power.readOp.dynamic + + dcache.caches.stats_t.writeAc.access * + dcache.caches.local_result.power.writeOp.dynamic); if (cache_p == Write_back) { // write miss will generate a write later dcache.power_t.readOp.dynamic += - dcache.caches->stats_t.writeAc.miss * - dcache.caches->local_result.power.writeOp.dynamic; + dcache.caches.stats_t.writeAc.miss * + dcache.caches.local_result.power.writeOp.dynamic; } dcache.power_t.readOp.dynamic += - dcache.missb->stats_t.readAc.access * - dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access * - dcache.missb->local_result.power.writeOp + dcache.missb.stats_t.readAc.access * + dcache.missb.local_result.power.searchOp.dynamic + + dcache.missb.stats_t.writeAc.access * + dcache.missb.local_result.power.writeOp .dynamic; // each access to missb involves a CAM and a write dcache.power_t.readOp.dynamic += - dcache.ifb->stats_t.readAc.access * - dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access * - dcache.ifb->local_result.power.writeOp.dynamic; + dcache.ifb.stats_t.readAc.access * + dcache.ifb.local_result.power.searchOp.dynamic + + dcache.ifb.stats_t.writeAc.access * + dcache.ifb.local_result.power.writeOp.dynamic; dcache.power_t.readOp.dynamic += - dcache.prefetchb->stats_t.readAc.access * - dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access * - dcache.prefetchb->local_result.power.writeOp.dynamic; + dcache.prefetchb.stats_t.readAc.access * + dcache.prefetchb.local_result.power.searchOp.dynamic + + dcache.prefetchb.stats_t.writeAc.access * + dcache.prefetchb.local_result.power.writeOp.dynamic; if (cache_p == Write_back) { dcache.power_t.readOp.dynamic += - dcache.wbb->stats_t.readAc.access * - dcache.wbb->local_result.power.searchOp.dynamic + - dcache.wbb->stats_t.writeAc.access * - dcache.wbb->local_result.power.writeOp.dynamic; + dcache.wbb.stats_t.readAc.access * + dcache.wbb.local_result.power.searchOp.dynamic + + dcache.wbb.stats_t.writeAc.access * + dcache.wbb.local_result.power.writeOp.dynamic; } if ((coredynp.core_ty == OOO) && @@ -543,18 +545,18 @@ void LoadStoreU::computeEnergy(bool is_tdp) { if (is_tdp) { // dcache.power = dcache.power_t + - // (dcache.caches->local_result.power)*pppm_lkg + - // (dcache.missb->local_result.power + - // dcache.ifb->local_result.power + - // dcache.prefetchb->local_result.power + - // dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) * - pppm_lkg; + // (dcache.caches.local_result.power)*pppm_lkg + + // (dcache.missb.local_result.power + + // dcache.ifb.local_result.power + + // dcache.prefetchb.local_result.power + + // dcache.wbb.local_result.power)*pppm_Isub; + dcache.power = + dcache.power_t + + (dcache.caches.local_result.power + dcache.missb.local_result.power + + dcache.ifb.local_result.power + dcache.prefetchb.local_result.power) * + pppm_lkg; if (cache_p == Write_back) { - dcache.power = dcache.power + dcache.wbb->local_result.power * pppm_lkg; + dcache.power = dcache.power + dcache.wbb.local_result.power * pppm_lkg; } LSQ->power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; @@ -567,20 +569,20 @@ void LoadStoreU::computeEnergy(bool is_tdp) { } } else { // dcache.rt_power = dcache.power_t + - // (dcache.caches->local_result.power + - // dcache.missb->local_result.power - // + dcache.ifb->local_result.power + - // dcache.prefetchb->local_result.power + - // dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) * - pppm_lkg; + // (dcache.caches.local_result.power + + // dcache.missb.local_result.power + // + dcache.ifb.local_result.power + + // dcache.prefetchb.local_result.power + + // dcache.wbb.local_result.power)*pppm_lkg; + dcache.rt_power = + dcache.power_t + + (dcache.caches.local_result.power + dcache.missb.local_result.power + + dcache.ifb.local_result.power + dcache.prefetchb.local_result.power) * + pppm_lkg; if (cache_p == Write_back) { dcache.rt_power = - dcache.rt_power + dcache.wbb->local_result.power * pppm_lkg; + dcache.rt_power + dcache.wbb.local_result.power * pppm_lkg; } LSQ->rt_power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; diff --git a/src/iocontrollers/flash_controller.cc b/src/iocontrollers/flash_controller.cc index 0753b13..ff4c100 100644 --- a/src/iocontrollers/flash_controller.cc +++ b/src/iocontrollers/flash_controller.cc @@ -104,9 +104,6 @@ void FlashController::computeArea() { // Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from // CAST SerDer_area = 0.36 / 8 * (ip.F_sz_um / 0.065) * (ip.F_sz_um / 0.065); - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; - number_channel = 1 + (fcp.num_channels - 1) * 0.2; area.set_area((ctrl_area + (fcp.withPHY ? SerDer_area : 0)) * 1e6 * number_channel); } @@ -262,6 +259,7 @@ void FlashController::set_params(const ParseXML *XML, fcp.num_mcs = XML->sys.flashc.number_mcs; fcp.type = XML->sys.flashc.type; fcp.withPHY = XML->sys.flashc.withPHY; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); long_channel = XML->sys.longer_channel_device; power_gating = XML->sys.power_gating; @@ -278,6 +276,9 @@ void FlashController::set_params(const ParseXML *XML, ip.specific_vcc_min = true; ip.user_defined_vcc_min = XML->sys.flashc.power_gating_vcc; } + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + number_channel = 1 + (fcp.num_channels - 1) * 0.2; init_params = true; } diff --git a/src/main.cc b/src/main.cc index 8ab3e42..8ea5b27 100644 --- a/src/main.cc +++ b/src/main.cc @@ -74,11 +74,11 @@ int main(int argc, char *argv[]) { Processor proc2; p1->parse(opt.input_xml); proc.init(p1); - // save(proc, opt.serialization_name); - // restore(proc2, opt.serialization_name); - // proc2.init(p1, true); - // proc2.displayEnergy(2, opt.print_level); - proc.displayEnergy(2, opt.print_level); + save(proc, opt.serialization_name); + restore(proc2, opt.serialization_name); + proc2.init(p1, true); + proc2.displayEnergy(2, opt.print_level); + // proc.displayEnergy(2, opt.print_level); delete p1; return 0; } diff --git a/src/memoryctrl/memoryctrl.h b/src/memoryctrl/memoryctrl.h index d18b3d4..881ef31 100644 --- a/src/memoryctrl/memoryctrl.h +++ b/src/memoryctrl/memoryctrl.h @@ -84,6 +84,7 @@ class MemoryController : public Component { ar &frontend; ar &transecEngine; ar &PHY; + ar &set_area; Component::serialize(ar, version); } }; diff --git a/src/noc.cc b/src/noc.cc index ea23754..7ff180e 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -397,7 +397,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { ? router.crossbar.power.readOp.longer_channel_leakage : router.crossbar.power.readOp.leakage) << " W" << std::endl; - if (power_gating) + if (power_gating) { std::cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel @@ -405,6 +405,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { .power_gated_with_long_channel_leakage : router.crossbar.power.readOp.power_gated_leakage) << " W" << std::endl; + } std::cout << indent_str << indent_str_next << "Gate Leakage = " << router.crossbar.power.readOp.gate_leakage << " W" << std::endl; @@ -423,7 +424,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { ? router.arbiter.power.readOp.longer_channel_leakage : router.arbiter.power.readOp.leakage) << " W" << std::endl; - if (power_gating) + if (power_gating) { std::cout << indent_str << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel @@ -431,6 +432,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { .power_gated_with_long_channel_leakage : router.arbiter.power.readOp.power_gated_leakage) << " W" << std::endl; + } std::cout << indent_str << indent_str_next << "Gate Leakage = " << router.arbiter.power.readOp.gate_leakage << " W" << std::endl; @@ -457,7 +459,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { ? link_bus_tot_per_Router.power.readOp.longer_channel_leakage : link_bus_tot_per_Router.power.readOp.leakage) << " W" << std::endl; - if (power_gating) + if (power_gating) { std::cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel @@ -465,6 +467,7 @@ void NoC::display(uint32_t indent, int plevel, bool is_tdp) { .power_gated_with_long_channel_leakage : link_bus_tot_per_Router.power.readOp.power_gated_leakage) << " W" << std::endl; + } std::cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage << " W" << std::endl; @@ -521,10 +524,11 @@ void NoC::set_noc_param(const ParseXML *XML) { assert(nocdynp.chip_coverage <= 1); assert(nocdynp.route_over_perc <= 1); - if (nocdynp.type) + if (nocdynp.type) { name = "NOC"; - else + } else { name = "BUSES"; + } if (XML->sys.NoC[ithNoC].vdd > 0) { interface_ip.specific_hp_vdd = true; diff --git a/src/noc.h b/src/noc.h index 1b975dd..17e4ad5 100644 --- a/src/noc.h +++ b/src/noc.h @@ -107,7 +107,11 @@ class NoC : public Component { ar &link_name; ar &router; ar &link_bus; - Component::serialize(ar, version); + ar &router_exist; + ar &link_bus_exist; + ar &link_bus_tot_per_Router; + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/processor.h b/src/processor.h index 12044a2..1b8e93a 100644 --- a/src/processor.h +++ b/src/processor.h @@ -112,7 +112,7 @@ class Processor : public Component { ar &niu; ar &pcie; ar &flashcontroller; - Component::serialize(ar, version); + // Component::serialize(ar, version); } }; From 1b292db3ac4800ec30649914caa498245dd7931e Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Mon, 22 Jun 2020 14:06:48 -0500 Subject: [PATCH 39/59] Fixed bug --- src/core/exec_unit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index b35c9b2..958f5d0 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -81,7 +81,7 @@ class EXECU : public Component { void computeStaticPower(); void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); - ~EXECU(); + ~EXECU(){}; private: bool init_params; From ca89bb4ac19229bdaf6628700604e32ae25d6549 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Mon, 22 Jun 2020 22:46:49 -0500 Subject: [PATCH 40/59] Inst_decoder unit donw --- src/cacti/decoder.cc | 251 +++++++++++- src/cacti/decoder.h | 35 +- src/core/instfetch.cc | 821 +++++++++++++++++++++++++++++++++++++- src/core/instfetch.h | 79 +++- src/logic/inst_decoder.cc | 60 +-- src/logic/inst_decoder.h | 37 +- 6 files changed, 1195 insertions(+), 88 deletions(-) diff --git a/src/cacti/decoder.cc b/src/cacti/decoder.cc index 80957c1..fda3fd9 100644 --- a/src/cacti/decoder.cc +++ b/src/cacti/decoder.cc @@ -54,8 +54,7 @@ Decoder::Decoder(int _num_dec_signals, R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), // power(), fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), - total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), - cell(cell_), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { + total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; @@ -86,16 +85,73 @@ Decoder::Decoder(int _num_dec_signals, } } - assert(cell.h > 0); - assert(cell.w > 0); + assert(cell_.h > 0); + assert(cell_.w > 0); // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; // area.h = 4 * cell.h; - area.h = g_tp.h_dec * cell.h; - + area.h = g_tp.h_dec * cell_.h; + height = cell_.h; compute_widths(); compute_area(); } + +void Decoder::set_params(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_, + int nodes_DSTN_) { + + exist = false; + C_ld_dec_out = _C_ld_dec_out; + + R_wire_dec_out=_R_wire_dec_out; num_gates=0; num_gates_min=2; delay=0; + fully_assoc=fully_assoc_; is_dram=is_dram_; is_wl_tr=is_wl_tr_; + total_driver_nwidth=0; total_driver_pwidth=0; sleeptx=NULL; nodes_DSTN=nodes_DSTN_; power_gating=power_gating_; + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + w_dec_n[i] = 0; + w_dec_p[i] = 0; + } + + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); + + if (num_addr_bits_dec < 4) { + if (flag_way_select) { + exist = true; + num_in_signals = 2; + } else { + num_in_signals = 0; + } + } else { + exist = true; + + if (flag_way_select) { + num_in_signals = 3; + } else { + num_in_signals = 2; + } + } + + assert(cell_.h > 0); + assert(cell_.w > 0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + // area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell_.h; + + height = cell_.h; +} + void Decoder::compute_widths() { double F; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); @@ -128,6 +184,10 @@ void Decoder::compute_widths() { } } +void Decoder::computeArea(){ + compute_widths(); + compute_area(); +} void Decoder::compute_area() { double cumulative_area = 0; double cumulative_curr = 0; // cumulative leakage current @@ -187,7 +247,7 @@ void Decoder::compute_power_gating() { double detalV; double c_wakeup; - c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, cell.h); // Psleep tx + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, height); // Psleep tx detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; // if (g_ip->power_gating) sleeptx = new Sleep_tx(g_ip->perfloss, @@ -311,16 +371,85 @@ PredecBlk::PredecBlk(int num_dec_signals, double R_wire_predec_blk_out_, int num_dec_per_predec, bool is_dram, - bool is_blk1) - : dec(dec_), exist(false), number_input_addr_bits(0), - C_ld_predec_blk_out(0), R_wire_predec_blk_out(0), - branch_effort_nand2_gate_output(1), branch_effort_nand3_gate_output(1), - flag_two_unique_paths(false), flag_L2_gate(0), number_inputs_L1_gate(0), - number_gates_L1_nand2_path(0), number_gates_L1_nand3_path(0), - number_gates_L2(0), min_number_gates_L1(2), min_number_gates_L2(2), - num_L1_active_nand2_path(0), num_L1_active_nand3_path(0), - delay_nand2_path(0), delay_nand3_path(0), power_nand2_path(), - power_nand3_path(), power_L2(), is_dram_(is_dram) { + bool is_blk1){ + dec = dec_; + exist= false; number_input_addr_bits= 0; + C_ld_predec_blk_out= 0; R_wire_predec_blk_out= 0; + branch_effort_nand2_gate_output= 1; branch_effort_nand3_gate_output= 1; + flag_two_unique_paths= false; flag_L2_gate= 0; number_inputs_L1_gate= 0; + number_gates_L1_nand2_path= 0; number_gates_L1_nand3_path= 0; + number_gates_L2= 0; min_number_gates_L1= 2; min_number_gates_L2= 2; + num_L1_active_nand2_path= 0; num_L1_active_nand3_path= 0; + delay_nand2_path= 0; delay_nand3_path= 0; is_dram_= is_dram; + + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; + + if (is_blk1 == true) { + if (num_addr_bits_dec <= 0) { + return; + } else if (num_addr_bits_dec < 4) { + // Just one predecoder block is required with NAND2 gates. No decoder + // required. The first level of predecoding directly drives the decoder + // output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } else { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } else { + if (num_addr_bits_dec >= 4) { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } + + compute_widths(); + compute_area(); +} + +void PredecBlk::set_params(int num_dec_signals, + Decoder *dec_, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1){ + dec = dec_; + exist= false; number_input_addr_bits= 0; + C_ld_predec_blk_out= 0; R_wire_predec_blk_out= 0; + branch_effort_nand2_gate_output= 1; branch_effort_nand3_gate_output= 1; + flag_two_unique_paths= false; flag_L2_gate= 0; number_inputs_L1_gate= 0; + number_gates_L1_nand2_path= 0; number_gates_L1_nand3_path= 0; + number_gates_L2= 0; min_number_gates_L1= 2; min_number_gates_L2= 2; + num_L1_active_nand2_path= 0; num_L1_active_nand3_path= 0; + delay_nand2_path= 0; delay_nand3_path= 0; is_dram_= is_dram; + int branch_effort_predec_out; double C_ld_dec_gate; int num_addr_bits_dec = _log2(num_dec_signals); @@ -1046,6 +1175,47 @@ void PredecBlk::leakage_feedback(double temperature) { } } +void PredecBlkDrv::set_params(int way_select_, PredecBlk *blk_, bool is_dram){ + flag_driver_exists=0; number_gates_nand2_path=0; + number_gates_nand3_path=0; min_number_gates=2; + num_buffers_driving_1_nand2_load=0; num_buffers_driving_2_nand2_load=0; + num_buffers_driving_4_nand2_load=0; num_buffers_driving_2_nand3_load=0; + num_buffers_driving_8_nand3_load=0; num_buffers_nand3_path=0; + c_load_nand2_path_out=0; c_load_nand3_path_out=0; + r_load_nand2_path_out=0; r_load_nand3_path_out=0; delay_nand2_path=0; + delay_nand3_path=0;blk=blk_; + dec=blk->dec; is_dram_=is_dram; way_select=way_select_; + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; + } + + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) { + c_load_nand2_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } else if (dec->num_in_signals == 3) { + c_load_nand3_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } else if (way_select == 0) { + if (blk->exist) { + flag_driver_exists = 1; + } + } + + compute_widths(); + compute_area(); +} + PredecBlkDrv::PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram) : flag_driver_exists(0), number_gates_nand2_path(0), number_gates_nand3_path(0), min_number_gates(2), @@ -1344,6 +1514,53 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) { num_act_mats_hor_dir; } +void Predec::set_params(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_){ + blk1=drv1_->blk; blk2=drv2_->blk; drv1=drv1_; drv2=drv2_; + + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + + driver_power.readOp.power_gated_leakage = + drv1->power_nand2_path.readOp.power_gated_leakage + + drv1->power_nand3_path.readOp.power_gated_leakage + + drv2->power_nand2_path.readOp.power_gated_leakage + + drv2->power_nand3_path.readOp.power_gated_leakage; + block_power.readOp.power_gated_leakage = + blk1->power_nand2_path.readOp.power_gated_leakage + + blk1->power_nand3_path.readOp.power_gated_leakage + + blk1->power_L2.readOp.power_gated_leakage + + blk2->power_nand2_path.readOp.power_gated_leakage + + blk2->power_nand3_path.readOp.power_gated_leakage + + blk2->power_L2.readOp.power_gated_leakage; + + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + + block_power.readOp.power_gated_leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; + +} Predec::Predec(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + diff --git a/src/cacti/decoder.h b/src/cacti/decoder.h index 80cc86c..ff7193a 100644 --- a/src/cacti/decoder.h +++ b/src/cacti/decoder.h @@ -43,7 +43,19 @@ using namespace std; class Decoder : public Component { public: - Decoder(int _num_dec_signals, + + Decoder(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_ = false, + int nodes_DSTN_ = 1); + Decoder(){}; + void set_params(int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, double _R_wire_dec_out, @@ -53,7 +65,6 @@ class Decoder : public Component { const Area &cell_, bool power_gating_ = false, int nodes_DSTN_ = 1); - bool exist; int num_in_signals; double C_ld_dec_out; @@ -68,14 +79,16 @@ class Decoder : public Component { bool is_dram; bool is_wl_tr; + double height; double total_driver_nwidth; double total_driver_pwidth; Sleep_tx *sleeptx; - const Area &cell; int nodes_DSTN; bool power_gating; + + void computeArea(); void compute_widths(); void compute_area(); double compute_delays(double inrisetime); // return outrisetime @@ -91,6 +104,7 @@ class Decoder : public Component { class PredecBlk : public Component { public: + PredecBlk(){}; PredecBlk(int num_dec_signals, Decoder *dec, double C_wire_predec_blk_out, @@ -98,6 +112,13 @@ class PredecBlk : public Component { int num_dec_per_predec, bool is_dram_, bool is_blk1); + void set_params(int num_dec_signals, + Decoder *dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); Decoder *dec; bool exist; @@ -142,7 +163,10 @@ class PredecBlk : public Component { class PredecBlkDrv : public Component { public: - PredecBlkDrv(int way_select, PredecBlk *blk_, bool is_dram); + + void set_params(int way_select_, PredecBlk *blk_, bool is_dram); + PredecBlkDrv(){}; + PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram); int flag_driver_exists; int number_input_addr_bits; @@ -194,6 +218,9 @@ class PredecBlkDrv : public Component { class Predec : public Component { public: + + Predec(){}; + void set_params(PredecBlkDrv *drv1, PredecBlkDrv *drv2); Predec(PredecBlkDrv *drv1, PredecBlkDrv *drv2); double compute_delays(double inrisetime); // return outrisetime diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index f12b590..7d62a87 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -1,3 +1,821 @@ +// /***************************************************************************** +// * McPAT +// * SOFTWARE LICENSE AGREEMENT +// * Copyright 2012 Hewlett-Packard Development Company, L.P. +// * All Rights Reserved +// * +// * Redistribution and use in source and binary forms, with or without +// * modification, are permitted provided that the following conditions are +// * met: redistributions of source code must retain the above copyright +// * notice, this list of conditions and the following disclaimer; +// * redistributions in binary form must reproduce the above copyright +// * notice, this list of conditions and the following disclaimer in the +// * documentation and/or other materials provided with the distribution; +// * neither the name of the copyright holders nor the names of its +// * contributors may be used to endorse or promote products derived from +// * this software without specific prior written permission. + +// * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” +// * +// ***************************************************************************/ + +// #include "instfetch.h" + +// #include "XML_Parse.h" +// #include "basic_circuit.h" +// #include "const.h" +// #include "io.h" +// #include "parameter.h" + +// #include +// #include +// #include +// #include +// #include + +// InstFetchU::InstFetchU(const ParseXML *XML_interface, +// int ithCore_, +// InputParameter *interface_ip_, +// const CoreDynParam &dyn_p_, +// bool exist_) +// : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), +// coredynp(dyn_p_), IB(0), BTB(0), exist(exist_) { +// if (!exist) +// return; +// int idx, tag, data, size, line, assoc, banks; +// bool debug = false, is_default = true; + +// clockRate = coredynp.clockRate; +// executionTime = coredynp.executionTime; +// cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; +// // Assuming all L1 caches are virtually idxed physically tagged. +// // cache + +// size = (int)XML->sys.core[ithCore].icache.icache_config[0]; +// line = (int)XML->sys.core[ithCore].icache.icache_config[1]; +// assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; +// banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; +// idx = debug ? 9 : int(ceil(log2(size / line / assoc))); +// tag = debug ? 51 +// : (int)XML->sys.physical_address_width - idx - +// int(ceil(log2(line))) + EXTRA_TAG_BITS; +// interface_ip.specific_tag = 1; +// interface_ip.tag_w = tag; +// interface_ip.cache_sz = +// debug ? 32768 : (int)XML->sys.core[ithCore].icache.icache_config[0]; +// interface_ip.line_sz = +// debug ? 64 : (int)XML->sys.core[ithCore].icache.icache_config[1]; +// interface_ip.assoc = +// debug ? 8 : (int)XML->sys.core[ithCore].icache.icache_config[2]; +// interface_ip.nbanks = +// debug ? 1 : (int)XML->sys.core[ithCore].icache.icache_config[3]; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = +// 0; // debug?0:XML->sys.core[ithCore].icache.icache_config[5]; +// interface_ip.throughput = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; +// interface_ip.latency = +// debug ? 3.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; +// interface_ip.is_cache = true; +// interface_ip.pure_cam = false; +// interface_ip.pure_ram = false; +// // interface_ip.obj_func_dyn_energy = 0; +// // interface_ip.obj_func_dyn_power = 0; +// // interface_ip.obj_func_leak_power = 0; +// // interface_ip.obj_func_cycle_t = 1; +// interface_ip.num_rw_ports = +// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; +// interface_ip.num_rd_ports = 0; +// interface_ip.num_wr_ports = 0; +// interface_ip.num_se_rd_ports = 0; +// icache.caches = new ArrayST(&interface_ip, +// "icache", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// scktRatio = g_tp.sckt_co_eff; +// chip_PR_overhead = g_tp.chip_layout_overhead; +// macro_PR_overhead = g_tp.macro_layout_overhead; +// icache.area.set_area(icache.area.get_area() + +// icache.caches->local_result.area); +// area.set_area(area.get_area() + icache.caches->local_result.area); +// // output_data_csv(icache.caches.local_result); + +// /* +// *iCache controllers +// *miss buffer Each MSHR contains enough state +// *to handle one or more accesses of any type to a single memory line. +// *Due to the generality of the MSHR mechanism, +// *the amount of state involved is non-trivial: +// *including the address, pointers to the cache entry and destination register, +// *written data, and various other pieces of state. +// */ +// interface_ip.num_search_ports = +// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; +// tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; +// data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + +// icache.caches->l_ip.line_sz * 8; +// interface_ip.specific_tag = 1; +// interface_ip.tag_w = tag; +// interface_ip.line_sz = +// int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); +// interface_ip.cache_sz = +// XML->sys.core[ithCore].icache.buffer_sizes[0] * interface_ip.line_sz; +// interface_ip.assoc = 0; +// interface_ip.nbanks = 1; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = 0; +// interface_ip.throughput = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[4] / +// clockRate; // means cycle time +// interface_ip.latency = debug +// ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[5] / +// clockRate; // means access time +// interface_ip.obj_func_dyn_energy = 0; +// interface_ip.obj_func_dyn_power = 0; +// interface_ip.obj_func_leak_power = 0; +// interface_ip.obj_func_cycle_t = 1; +// interface_ip.num_rw_ports = +// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; +// interface_ip.num_rd_ports = 0; +// interface_ip.num_wr_ports = 0; +// interface_ip.num_se_rd_ports = 0; +// interface_ip.num_search_ports = +// XML->sys.core[ithCore].number_instruction_fetch_ports; +// icache.missb = new ArrayST(&interface_ip, +// "icacheMissBuffer", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// icache.area.set_area(icache.area.get_area() + +// icache.missb->local_result.area); +// area.set_area(area.get_area() + icache.missb->local_result.area); +// // output_data_csv(icache.missb.local_result); + +// // fill buffer +// tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; +// data = icache.caches->l_ip.line_sz; +// interface_ip.specific_tag = 1; +// interface_ip.tag_w = tag; +// interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); +// interface_ip.cache_sz = data * XML->sys.core[ithCore].icache.buffer_sizes[1]; +// interface_ip.assoc = 0; +// interface_ip.nbanks = 1; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = 0; +// interface_ip.throughput = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; +// interface_ip.latency = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; +// interface_ip.obj_func_dyn_energy = 0; +// interface_ip.obj_func_dyn_power = 0; +// interface_ip.obj_func_leak_power = 0; +// interface_ip.obj_func_cycle_t = 1; +// interface_ip.num_rw_ports = +// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; +// interface_ip.num_rd_ports = 0; +// interface_ip.num_wr_ports = 0; +// interface_ip.num_se_rd_ports = 0; +// interface_ip.num_search_ports = +// XML->sys.core[ithCore].number_instruction_fetch_ports; +// icache.ifb = new ArrayST(&interface_ip, +// "icacheFillBuffer", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); +// area.set_area(area.get_area() + icache.ifb->local_result.area); +// // output_data_csv(icache.ifb.local_result); + +// // prefetch buffer +// tag = XML->sys.physical_address_width + +// EXTRA_TAG_BITS; // check with previous entries to decide wthether to +// // merge. +// data = icache.caches->l_ip +// .line_sz; // separate queue to prevent from cache polution. +// interface_ip.specific_tag = 1; +// interface_ip.tag_w = tag; +// interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); +// interface_ip.cache_sz = +// XML->sys.core[ithCore].icache.buffer_sizes[2] * interface_ip.line_sz; +// interface_ip.assoc = 0; +// interface_ip.nbanks = 1; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = 0; +// interface_ip.throughput = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; +// interface_ip.latency = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; +// interface_ip.obj_func_dyn_energy = 0; +// interface_ip.obj_func_dyn_power = 0; +// interface_ip.obj_func_leak_power = 0; +// interface_ip.obj_func_cycle_t = 1; +// interface_ip.num_rw_ports = +// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; +// interface_ip.num_rd_ports = 0; +// interface_ip.num_wr_ports = 0; +// interface_ip.num_se_rd_ports = 0; +// interface_ip.num_search_ports = +// XML->sys.core[ithCore].number_instruction_fetch_ports; +// icache.prefetchb = new ArrayST(&interface_ip, +// "icacheprefetchBuffer", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// icache.area.set_area(icache.area.get_area() + +// icache.prefetchb->local_result.area); +// area.set_area(area.get_area() + icache.prefetchb->local_result.area); +// // output_data_csv(icache.prefetchb.local_result); + +// // Instruction buffer +// data = +// XML->sys.core[ithCore].instruction_length * +// XML->sys.core[ithCore] +// .peak_issue_width; // icache.caches.l_ip.line_sz; //multiple +// // threads timing sharing the instruction buffer. +// interface_ip.is_cache = false; +// interface_ip.pure_ram = true; +// interface_ip.pure_cam = false; +// interface_ip.line_sz = int(ceil(data / 8.0)); +// interface_ip.cache_sz = +// XML->sys.core[ithCore].number_hardware_threads * +// XML->sys.core[ithCore].instruction_buffer_size * +// interface_ip.line_sz > +// 64 +// ? XML->sys.core[ithCore].number_hardware_threads * +// XML->sys.core[ithCore].instruction_buffer_size * +// interface_ip.line_sz +// : 64; +// interface_ip.assoc = 1; +// interface_ip.nbanks = 1; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = 0; +// interface_ip.throughput = 1.0 / clockRate; +// interface_ip.latency = 1.0 / clockRate; +// interface_ip.obj_func_dyn_energy = 0; +// interface_ip.obj_func_dyn_power = 0; +// interface_ip.obj_func_leak_power = 0; +// interface_ip.obj_func_cycle_t = 1; +// // NOTE: Assuming IB is time slice shared among threads, every fetch op will +// // at least fetch "fetch width" instructions. +// interface_ip.num_rw_ports = +// debug +// ? 1 +// : XML->sys.core[ithCore] +// .number_instruction_fetch_ports; // XML->sys.core[ithCore].fetch_width; +// interface_ip.num_rd_ports = 0; +// interface_ip.num_wr_ports = 0; +// interface_ip.num_se_rd_ports = 0; +// IB = new ArrayST(&interface_ip, +// "InstBuffer", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// IB->area.set_area(IB->area.get_area() + IB->local_result.area); +// area.set_area(area.get_area() + IB->local_result.area); +// // output_data_csv(IB.IB.local_result); + +// // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; +// // inst_decoder.init_decoder(is_default, &interface_ip); +// // inst_decoder.full_decoder_power(); + +// if (coredynp.predictionW > 0) { +// /* +// * BTB branch target buffer, accessed during IF stage. Virtually indexed and +// * virtually tagged It is only a cache without all the buffers in the cache +// * controller since it is more like a look up table than a cache with cache +// * controller. When access miss, no load from other places such as main +// * memory (not actively fill the misses), it is passively updated under two +// * circumstances: 1) when BPT@ID stage finds out current is a taken branch +// * while BTB missed 2) When BPT@ID stage predicts differently than BTB 3) +// * When ID stage finds out current instruction is not a branch while BTB had +// * a hit.(mark as invalid) 4) when EXEU find out wrong target has been +// * provided from BTB. +// * +// */ +// size = XML->sys.core[ithCore].BTB.BTB_config[0]; +// line = XML->sys.core[ithCore].BTB.BTB_config[1]; +// assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; +// banks = XML->sys.core[ithCore].BTB.BTB_config[3]; +// idx = debug ? 9 : int(ceil(log2(size / line / assoc))); +// // tag = +// // debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + +// // int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +// // +EXTRA_TAG_BITS; +// tag = debug ? 51 +// : XML->sys.virtual_address_width + +// int(ceil(log2( +// XML->sys.core[ithCore].number_hardware_threads))) + +// EXTRA_TAG_BITS; +// interface_ip.is_cache = true; +// interface_ip.pure_ram = false; +// interface_ip.pure_cam = false; +// interface_ip.specific_tag = 1; +// interface_ip.tag_w = tag; +// interface_ip.cache_sz = debug ? 32768 : size; +// interface_ip.line_sz = debug ? 64 : line; +// interface_ip.assoc = debug ? 8 : assoc; +// interface_ip.nbanks = debug ? 1 : banks; +// interface_ip.out_w = interface_ip.line_sz * 8; +// interface_ip.access_mode = +// 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; +// interface_ip.throughput = +// debug ? 1.0 / clockRate +// : XML->sys.core[ithCore].BTB.BTB_config[4] / clockRate; +// interface_ip.latency = +// debug ? 3.0 / clockRate +// : XML->sys.core[ithCore].BTB.BTB_config[5] / clockRate; +// interface_ip.obj_func_dyn_energy = 0; +// interface_ip.obj_func_dyn_power = 0; +// interface_ip.obj_func_leak_power = 0; +// interface_ip.obj_func_cycle_t = 1; +// interface_ip.num_rw_ports = 1; +// interface_ip.num_rd_ports = coredynp.predictionW; +// interface_ip.num_wr_ports = coredynp.predictionW; +// interface_ip.num_se_rd_ports = 0; +// BTB = new ArrayST(&interface_ip, +// "Branch Target Buffer", +// Core_device, +// coredynp.opt_local, +// coredynp.core_ty); +// BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); +// area.set_area(area.get_area() + BTB->local_result.area); +// /// cout<<"area="<set_params(XML, ithCore, &interface_ip, coredynp); +// BPT->computeArea(); +// BPT->set_stats(XML); +// area.set_area(area.get_area() + BPT->area.get_area()); +// } + +// ID_inst.set_params(is_default, +// &interface_ip, +// coredynp.opcode_length, +// 1 /*Decoder should not know how many by itself*/, +// coredynp.x86, +// Core_device, +// coredynp.core_ty); + +// ID_operand.set_params(is_default, +// &interface_ip, +// coredynp.arch_ireg_width, +// 1, +// coredynp.x86, +// Core_device, +// coredynp.core_ty); + +// ID_misc.set_params(is_default, +// &interface_ip, +// 8 /* Prefix field etc upto 14B*/, +// 1, +// coredynp.x86, +// Core_device, +// coredynp.core_ty); +// ID_inst.computeArea(); +// ID_inst.computeDynamicPower(); +// ID_operand.computeArea(); +// ID_operand.computeDynamicPower(); +// ID_misc.computeArea(); +// ID_misc.computeDynamicPower(); +// // TODO: X86 decoder should decode the inst in cyclic mode under the control +// // of squencer. So the dynamic power should be multiplied by a few times. +// area.set_area(area.get_area() + +// (ID_inst.area.get_area() + ID_operand.area.get_area() + +// ID_misc.area.get_area()) * +// coredynp.decodeW); +// } + +// void InstFetchU::computeEnergy(bool is_tdp) { +// if (!exist) +// return; +// if (is_tdp) { +// // init stats for Peak +// icache.caches->stats_t.readAc.access = +// icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; +// icache.caches->stats_t.readAc.miss = 0; +// icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - +// icache.caches->stats_t.readAc.miss; +// icache.caches->tdp_stats = icache.caches->stats_t; + +// icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = +// icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = +// icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.missb->tdp_stats = icache.missb->stats_t; + +// icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = +// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = +// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.ifb->tdp_stats = icache.ifb->stats_t; + +// icache.prefetchb->stats_t.readAc.access = +// icache.prefetchb->stats_t.readAc.hit = +// icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = +// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; +// icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; + +// IB->stats_t.readAc.access = IB->stats_t.writeAc.access = +// XML->sys.core[ithCore].peak_issue_width; +// IB->tdp_stats = IB->stats_t; + +// if (coredynp.predictionW > 0) { +// BTB->stats_t.readAc.access = +// coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; +// BTB->stats_t.writeAc.access = +// 0; // XML->sys.core[ithCore].BTB.write_accesses; +// } + +// ID_inst.stats_t.readAc.access = coredynp.decodeW; +// ID_operand.stats_t.readAc.access = coredynp.decodeW; +// ID_misc.stats_t.readAc.access = coredynp.decodeW; +// ID_inst.tdp_stats = ID_inst.stats_t; +// ID_operand.tdp_stats = ID_operand.stats_t; +// ID_misc.tdp_stats = ID_misc.stats_t; + +// } else { +// // init stats for Runtime Dynamic (RTP) +// icache.caches->stats_t.readAc.access = +// XML->sys.core[ithCore].icache.read_accesses; +// icache.caches->stats_t.readAc.miss = +// XML->sys.core[ithCore].icache.read_misses; +// icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - +// icache.caches->stats_t.readAc.miss; +// icache.caches->rtp_stats = icache.caches->stats_t; + +// icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; +// icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; +// icache.missb->rtp_stats = icache.missb->stats_t; + +// icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; +// icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; +// icache.ifb->rtp_stats = icache.ifb->stats_t; + +// icache.prefetchb->stats_t.readAc.access = +// icache.caches->stats_t.readAc.miss; +// icache.prefetchb->stats_t.writeAc.access = +// icache.caches->stats_t.readAc.miss; +// icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; + +// IB->stats_t.readAc.access = IB->stats_t.writeAc.access = +// XML->sys.core[ithCore].total_instructions; +// IB->rtp_stats = IB->stats_t; + +// if (coredynp.predictionW > 0) { +// BTB->stats_t.readAc.access = +// XML->sys.core[ithCore] +// .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; +// BTB->stats_t.writeAc.access = +// XML->sys.core[ithCore] +// .BTB +// .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; +// BTB->rtp_stats = BTB->stats_t; +// } + +// ID_inst.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; +// ID_operand.stats_t.readAc.access = +// XML->sys.core[ithCore].total_instructions; +// ID_misc.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; +// ID_inst.rtp_stats = ID_inst.stats_t; +// ID_operand.rtp_stats = ID_operand.stats_t; +// ID_misc.rtp_stats = ID_misc.stats_t; +// } + +// icache.power_t.reset(); +// IB->power_t.reset(); +// // ID_inst.power_t.reset(); +// // ID_operand.power_t.reset(); +// // ID_misc.power_t.reset(); +// if (coredynp.predictionW > 0) { +// BTB->power_t.reset(); +// } + +// icache.power_t.readOp.dynamic += +// (icache.caches->stats_t.readAc.hit * +// icache.caches->local_result.power.readOp.dynamic + +// // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ +// icache.caches->stats_t.readAc.miss * +// icache.caches->local_result.power.readOp +// .dynamic + // assume tag data accessed in parallel +// icache.caches->stats_t.readAc.miss * +// icache.caches->local_result.power.writeOp +// .dynamic); // read miss in Icache cause a write to Icache +// icache.power_t.readOp.dynamic += +// icache.missb->stats_t.readAc.access * +// icache.missb->local_result.power.searchOp.dynamic + +// icache.missb->stats_t.writeAc.access * +// icache.missb->local_result.power.writeOp +// .dynamic; // each access to missb involves a CAM and a write +// icache.power_t.readOp.dynamic += +// icache.ifb->stats_t.readAc.access * +// icache.ifb->local_result.power.searchOp.dynamic + +// icache.ifb->stats_t.writeAc.access * +// icache.ifb->local_result.power.writeOp.dynamic; +// icache.power_t.readOp.dynamic += +// icache.prefetchb->stats_t.readAc.access * +// icache.prefetchb->local_result.power.searchOp.dynamic + +// icache.prefetchb->stats_t.writeAc.access * +// icache.prefetchb->local_result.power.writeOp.dynamic; + +// IB->power_t.readOp.dynamic += +// IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + +// IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; + +// if (coredynp.predictionW > 0) { +// BTB->power_t.readOp.dynamic += +// BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + +// BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; + +// BPT->computeDynamicPower(is_tdp); +// } + +// if (is_tdp) { +// // icache.power = icache.power_t + +// // (icache.caches->local_result.power)*pppm_lkg + +// // (icache.missb->local_result.power + +// // icache.ifb->local_result.power + +// // icache.prefetchb->local_result.power)*pppm_Isub; +// icache.power = icache.power_t + (icache.caches->local_result.power + +// icache.missb->local_result.power + +// icache.ifb->local_result.power + +// icache.prefetchb->local_result.power) * +// pppm_lkg; + +// IB->power = IB->power_t + IB->local_result.power * pppm_lkg; +// power = power + icache.power + IB->power; +// if (coredynp.predictionW > 0) { +// BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; +// power = power + BTB->power + BPT->power; +// } + +// ID_inst.power_t.readOp.dynamic = ID_inst.power.readOp.dynamic; +// ID_operand.power_t.readOp.dynamic = ID_operand.power.readOp.dynamic; +// ID_misc.power_t.readOp.dynamic = ID_misc.power.readOp.dynamic; + +// ID_inst.power.readOp.dynamic *= ID_inst.tdp_stats.readAc.access; +// ID_operand.power.readOp.dynamic *= ID_operand.tdp_stats.readAc.access; +// ID_misc.power.readOp.dynamic *= ID_misc.tdp_stats.readAc.access; + +// power = power + (ID_inst.power + ID_operand.power + ID_misc.power); +// } else { +// // icache.rt_power = icache.power_t + +// // (icache.caches->local_result.power)*pppm_lkg + +// // (icache.missb->local_result.power + +// // icache.ifb->local_result.power + +// // icache.prefetchb->local_result.power)*pppm_Isub; + +// icache.rt_power = icache.power_t + (icache.caches->local_result.power + +// icache.missb->local_result.power + +// icache.ifb->local_result.power + +// icache.prefetchb->local_result.power) * +// pppm_lkg; + +// IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; +// rt_power = rt_power + icache.rt_power + IB->rt_power; +// if (coredynp.predictionW > 0) { +// BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; +// rt_power = rt_power + BTB->rt_power + BPT->rt_power; +// } + +// ID_inst.rt_power.readOp.dynamic = +// ID_inst.power_t.readOp.dynamic * ID_inst.rtp_stats.readAc.access; +// ID_operand.rt_power.readOp.dynamic = ID_operand.power_t.readOp.dynamic * +// ID_operand.rtp_stats.readAc.access; +// ID_misc.rt_power.readOp.dynamic = +// ID_misc.power_t.readOp.dynamic * ID_misc.rtp_stats.readAc.access; + +// rt_power = rt_power + +// (ID_inst.rt_power + ID_operand.rt_power + ID_misc.rt_power); +// } +// } + +// void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +// if (!exist) +// return; +// string indent_str(indent, ' '); +// string indent_str_next(indent + 2, ' '); +// bool long_channel = XML->sys.longer_channel_device; +// bool power_gating = XML->sys.power_gating; + +// if (is_tdp) { + +// cout << indent_str << "Instruction Cache:" << endl; +// cout << indent_str_next << "Area = " << icache.area.get_area() * 1e-6 +// << " mm^2" << endl; +// cout << indent_str_next +// << "Peak Dynamic = " << icache.power.readOp.dynamic * clockRate << " W" +// << endl; +// cout << indent_str_next << "Subthreshold Leakage = " +// << (long_channel ? icache.power.readOp.longer_channel_leakage +// : icache.power.readOp.leakage) +// << " W" << endl; +// if (power_gating) +// cout << indent_str_next << "Subthreshold Leakage with power gating = " +// << (long_channel +// ? icache.power.readOp.power_gated_with_long_channel_leakage +// : icache.power.readOp.power_gated_leakage) +// << " W" << endl; +// cout << indent_str_next +// << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" +// << endl; +// cout << indent_str_next << "Runtime Dynamic = " +// << icache.rt_power.readOp.dynamic / executionTime << " W" << endl; +// cout << endl; +// if (coredynp.predictionW > 0) { +// cout << indent_str << "Branch Target Buffer:" << endl; +// cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 +// << " mm^2" << endl; +// cout << indent_str_next +// << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" +// << endl; +// cout << indent_str_next << "Subthreshold Leakage = " +// << (long_channel ? BTB->power.readOp.longer_channel_leakage +// : BTB->power.readOp.leakage) +// << " W" << endl; +// if (power_gating) +// cout << indent_str_next << "Subthreshold Leakage with power gating = " +// << (long_channel +// ? BTB->power.readOp.power_gated_with_long_channel_leakage +// : BTB->power.readOp.power_gated_leakage) +// << " W" << endl; +// cout << indent_str_next +// << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" +// << endl; +// cout << indent_str_next << "Runtime Dynamic = " +// << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; +// cout << endl; +// if (BPT->exist) { +// cout << indent_str << "Branch Predictor:" << endl; +// cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 +// << " mm^2" << endl; +// cout << indent_str_next +// << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate +// << " W" << endl; +// cout << indent_str_next << "Subthreshold Leakage = " +// << (long_channel ? BPT->power.readOp.longer_channel_leakage +// : BPT->power.readOp.leakage) +// << " W" << endl; +// if (power_gating) +// cout << indent_str_next << "Subthreshold Leakage with power gating = " +// << (long_channel +// ? BPT->power.readOp.power_gated_with_long_channel_leakage +// : BPT->power.readOp.power_gated_leakage) +// << " W" << endl; +// cout << indent_str_next +// << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" +// << endl; +// cout << indent_str_next << "Runtime Dynamic = " +// << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; +// cout << endl; +// if (plevel > 3) { +// BPT->displayEnergy(indent + 4, plevel, is_tdp); +// } +// } +// } +// cout << indent_str << "Instruction Buffer:" << endl; +// cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 +// << " mm^2" << endl; +// cout << indent_str_next +// << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" +// << endl; +// cout << indent_str_next << "Subthreshold Leakage = " +// << (long_channel ? IB->power.readOp.longer_channel_leakage +// : IB->power.readOp.leakage) +// << " W" << endl; +// if (power_gating) +// cout << indent_str_next << "Subthreshold Leakage with power gating = " +// << (long_channel +// ? IB->power.readOp.power_gated_with_long_channel_leakage +// : IB->power.readOp.power_gated_leakage) +// << " W" << endl; +// cout << indent_str_next +// << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; +// cout << indent_str_next +// << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime +// << " W" << endl; +// cout << endl; +// cout << indent_str << "Instruction Decoder:" << endl; +// cout << indent_str_next << "Area = " +// << (ID_inst.area.get_area() + ID_operand.area.get_area() + +// ID_misc.area.get_area()) * +// coredynp.decodeW * 1e-6 +// << " mm^2" << endl; +// cout << indent_str_next << "Peak Dynamic = " +// << (ID_inst.power.readOp.dynamic + ID_operand.power.readOp.dynamic + +// ID_misc.power.readOp.dynamic) * +// clockRate +// << " W" << endl; +// cout << indent_str_next << "Subthreshold Leakage = " +// << (long_channel ? (ID_inst.power.readOp.longer_channel_leakage + +// ID_operand.power.readOp.longer_channel_leakage + +// ID_misc.power.readOp.longer_channel_leakage) +// : (ID_inst.power.readOp.leakage + +// ID_operand.power.readOp.leakage + +// ID_misc.power.readOp.leakage)) +// << " W" << endl; + +// double tot_leakage = +// (ID_inst.power.readOp.leakage + ID_operand.power.readOp.leakage + +// ID_misc.power.readOp.leakage); +// double tot_leakage_longchannel = +// (ID_inst.power.readOp.longer_channel_leakage + +// ID_operand.power.readOp.longer_channel_leakage + +// ID_misc.power.readOp.longer_channel_leakage); +// double tot_leakage_pg = (ID_inst.power.readOp.power_gated_leakage + +// ID_operand.power.readOp.power_gated_leakage + +// ID_misc.power.readOp.power_gated_leakage); +// double tot_leakage_pg_with_long_channel = +// (ID_inst.power.readOp.power_gated_with_long_channel_leakage + +// ID_operand.power.readOp.power_gated_with_long_channel_leakage + +// ID_misc.power.readOp.power_gated_with_long_channel_leakage); + +// if (power_gating) +// cout << indent_str_next << "Subthreshold Leakage with power gating = " +// << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) +// << " W" << endl; +// cout << indent_str_next << "Gate Leakage = " +// << (ID_inst.power.readOp.gate_leakage + +// ID_operand.power.readOp.gate_leakage + +// ID_misc.power.readOp.gate_leakage) +// << " W" << endl; +// cout << indent_str_next << "Runtime Dynamic = " +// << (ID_inst.rt_power.readOp.dynamic + +// ID_operand.rt_power.readOp.dynamic + +// ID_misc.rt_power.readOp.dynamic) / +// executionTime +// << " W" << endl; +// cout << endl; +// } else { +// // cout << indent_str_next << "Instruction Cache Peak Dynamic = " +// //<< icache.rt_power.readOp.dynamic*clockRate << " W" << endl; +// // cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " +// // << icache.rt_power.readOp.leakage <<" W" << endl; cout << +// // indent_str_next << "Instruction Cache Gate Leakage = " << +// // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << +// // indent_str_next << "Instruction Buffer Peak Dynamic = " << +// // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << +// // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << +// // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next +// // << "Instruction Buffer Gate Leakage = " << +// // IB->rt_power.readOp.gate_leakage +// //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer +// // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << +// // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold +// // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout +// // << indent_str_next << "Branch Target Buffer Gate Leakage = " << +// // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << +// // indent_str_next << "Branch Predictor Peak Dynamic = " << +// // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout +// // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << +// // BPT->rt_power.readOp.leakage << " W" << endl; cout << +// // indent_str_next +// // << "Branch Predictor Gate Leakage = " << +// // BPT->rt_power.readOp.gate_leakage +// //<< " W" << endl; +// } +// } + +// InstFetchU ::~InstFetchU() { + +// if (!exist) +// return; +// if (IB) { +// delete IB; +// IB = 0; +// } +// if (coredynp.predictionW > 0) { +// if (BTB) { +// delete BTB; +// BTB = 0; +// } +// if (BPT) { +// delete BPT; +// BPT = 0; +// } +// } +// } + + /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT @@ -14,7 +832,6 @@ * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -820,4 +1637,4 @@ InstFetchU ::~InstFetchU() { BPT = 0; } } -} +} \ No newline at end of file diff --git a/src/core/instfetch.h b/src/core/instfetch.h index 110f148..b1606ce 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -1,3 +1,79 @@ +// /***************************************************************************** +// * McPAT +// * SOFTWARE LICENSE AGREEMENT +// * Copyright 2012 Hewlett-Packard Development Company, L.P. +// * All Rights Reserved +// * +// * Redistribution and use in source and binary forms, with or without +// * modification, are permitted provided that the following conditions are +// * met: redistributions of source code must retain the above copyright +// * notice, this list of conditions and the following disclaimer; +// * redistributions in binary form must reproduce the above copyright +// * notice, this list of conditions and the following disclaimer in the +// * documentation and/or other materials provided with the distribution; +// * neither the name of the copyright holders nor the names of its +// * contributors may be used to endorse or promote products derived from +// * this software without specific prior written permission. + +// * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” +// * +// ***************************************************************************/ + +// #ifndef __INST_FETCH_U_H__ +// #define __INST_FETCH_U_H__ + +// #include "XML_Parse.h" +// #include "array.h" +// #include "basic_components.h" +// #include "branch_predictor.h" +// #include "inst_decoder.h" +// #include "instcache.h" +// #include "interconnect.h" +// #include "parameter.h" + +// class InstFetchU : public Component { +// public: +// const ParseXML *XML; +// int ithCore; +// InputParameter interface_ip; +// CoreDynParam coredynp; +// double clockRate; +// double executionTime; +// double scktRatio; +// double chip_PR_overhead; +// double macro_PR_overhead; +// enum Cache_policy cache_p; +// InstCache icache; +// ArrayST *IB; +// ArrayST *BTB; +// BranchPredictor *BPT; +// inst_decoder ID_inst; +// inst_decoder ID_operand; +// inst_decoder ID_misc; +// bool exist; + +// InstFetchU(const ParseXML *XML_interface, +// int ithCore_, +// InputParameter *interface_ip_, +// const CoreDynParam &dyn_p_, +// bool exsit = true); +// void computeEnergy(bool is_tdp = true); +// void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); +// ~InstFetchU(); +// }; + +// #endif // __INST_FETCH_U_H__ + /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT @@ -14,7 +90,6 @@ * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -72,4 +147,4 @@ class InstFetchU : public Component { ~InstFetchU(); }; -#endif // __INST_FETCH_U_H__ +#endif // __INST_FETCH_U_H__ \ No newline at end of file diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index a7472df..f0f876d 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -1,3 +1,4 @@ + /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT @@ -14,7 +15,6 @@ * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -88,7 +88,7 @@ inst_decoder::inst_decoder(bool _is_default, is_dram); // TODO: this number 1024 needs to be revisited R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; - final_dec = new Decoder(num_decoded_signals, + final_dec.set_params(num_decoded_signals, false, C_driver_load, R_wire_load, @@ -96,35 +96,37 @@ inst_decoder::inst_decoder(bool _is_default, false /*is_dram*/, false /*wl_tr*/, // to use peri device cell); - - PredecBlk *predec_blk1 = - new PredecBlk(num_decoded_signals, - final_dec, + final_dec.computeArea(); + PredecBlk predec_blk1; + predec_blk1.set_params(num_decoded_signals, + &final_dec, 0, // Assuming predec and dec are back to back 0, 1, // Each Predec only drives one final dec false /*is_dram*/, true); - PredecBlk *predec_blk2 = - new PredecBlk(num_decoded_signals, - final_dec, + PredecBlk predec_blk2; + predec_blk2.set_params(num_decoded_signals, + &final_dec, 0, // Assuming predec and dec are back to back 0, 1, // Each Predec only drives one final dec false /*is_dram*/, false); - PredecBlkDrv *predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); - PredecBlkDrv *predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); + PredecBlkDrv predec_blk_drv1; + predec_blk_drv1.set_params(0, &predec_blk1, false); + PredecBlkDrv predec_blk_drv2; + predec_blk_drv2.set_params(0, &predec_blk2, false); - pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); + pre_dec.set_params(&predec_blk_drv1, &predec_blk_drv2); - double area_decoder = final_dec->area.get_area() * num_decoded_signals * + double area_decoder = final_dec.area.get_area() * num_decoded_signals * num_decoder_segments * num_decoders; // double w_decoder = area_decoder / area.get_h(); double area_pre_dec = - (predec_blk_drv1->area.get_area() + predec_blk_drv2->area.get_area() + - predec_blk1->area.get_area() + predec_blk2->area.get_area()) * + (predec_blk_drv1.area.get_area() + predec_blk_drv2.area.get_area() + + predec_blk1.area.get_area() + predec_blk2.area.get_area()) * num_decoder_segments * num_decoders; area.set_area(area.get_area() + area_decoder + area_pre_dec); double macro_layout_overhead = g_tp.macro_layout_overhead; @@ -156,28 +158,28 @@ void inst_decoder::inst_decoder_delay_power() { double pppm_t[4] = {1, 1, 1, 1}; double squencer_passes = x86 ? 2 : 1; - outrisetime = pre_dec->compute_delays(inrisetime); - dec_outrisetime = final_dec->compute_delays(outrisetime); + outrisetime = pre_dec.compute_delays(inrisetime); + dec_outrisetime = final_dec.compute_delays(outrisetime); set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments, squencer_passes * num_decoder_segments, num_decoder_segments); - power = power + pre_dec->power * pppm_t; + power = power + pre_dec.power * pppm_t; set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments * num_decoded_signals, num_decoder_segments * num_decoded_signals, squencer_passes * num_decoder_segments); - power = power + final_dec->power * pppm_t; + power = power + final_dec.power * pppm_t; } void inst_decoder::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature / 10.0) * 10; uca_org_t init_result = init_interface(&l_ip); // init_result is dummy - final_dec->leakage_feedback(temperature); - pre_dec->leakage_feedback(temperature); + final_dec.leakage_feedback(temperature); + pre_dec.leakage_feedback(temperature); double pppm_t[4] = {1, 1, 1, 1}; double squencer_passes = x86 ? 2 : 1; @@ -187,14 +189,14 @@ void inst_decoder::leakage_feedback(double temperature) { num_decoder_segments, squencer_passes * num_decoder_segments, num_decoder_segments); - power = pre_dec->power * pppm_t; + power = pre_dec.power * pppm_t; set_pppm(pppm_t, squencer_passes * num_decoder_segments, num_decoder_segments * num_decoded_signals, num_decoder_segments * num_decoded_signals, squencer_passes * num_decoder_segments); - power = power + final_dec->power * pppm_t; + power = power + final_dec.power * pppm_t; double sckRation = g_tp.sckt_co_eff; @@ -216,11 +218,9 @@ void inst_decoder::leakage_feedback(double temperature) { inst_decoder::~inst_decoder() { local_result.cleanup(); - delete final_dec; - delete pre_dec->blk1; - delete pre_dec->blk2; - delete pre_dec->drv1; - delete pre_dec->drv2; - delete pre_dec; -} + delete pre_dec.blk1; + delete pre_dec.blk2; + delete pre_dec.drv1; + delete pre_dec.drv2; +} \ No newline at end of file diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index e65a07f..b38360b 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -1,33 +1,3 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ #ifndef __INST_DECODER_H__ #define __INST_DECODER_H__ @@ -49,6 +19,7 @@ class inst_decoder : public Component { public: + inst_decoder(bool _is_default, const InputParameter *configure_interface, int opcode_length_, @@ -68,8 +39,8 @@ class inst_decoder : public Component { enum Device_ty device_ty; enum Core_type core_ty; - Decoder *final_dec; - Predec *pre_dec; + Decoder final_dec; + Predec pre_dec; statsDef tdp_stats; statsDef rtp_stats; @@ -80,4 +51,4 @@ class inst_decoder : public Component { void leakage_feedback(double temperature); }; -#endif //__INST_DECODER_H__ +#endif //__INST_DECODER_H__ \ No newline at end of file From 3269b096bee295224dc8a7462c6f83d1a1632709 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Mon, 22 Jun 2020 23:41:24 -0500 Subject: [PATCH 41/59] Done with the ID units --- src/core/instfetch.cc | 145 +++++++++++++++++++------------------- src/core/instfetch.h | 6 +- src/logic/inst_decoder.cc | 24 ++++--- src/logic/inst_decoder.h | 13 +++- 4 files changed, 99 insertions(+), 89 deletions(-) diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 7d62a87..3fac327 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -866,7 +866,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), IB(0), BTB(0), ID_inst(0), ID_operand(0), ID_misc(0), + coredynp(dyn_p_), IB(0), BTB(0), exist(exist_) { if (!exist) return; @@ -1185,7 +1185,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, area.set_area(area.get_area() + BPT->area.get_area()); } - ID_inst = new inst_decoder(is_default, + ID_inst.set_params(is_default, &interface_ip, coredynp.opcode_length, 1 /*Decoder should not know how many by itself*/, @@ -1193,7 +1193,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.core_ty); - ID_operand = new inst_decoder(is_default, + ID_operand.set_params(is_default, &interface_ip, coredynp.arch_ireg_width, 1, @@ -1201,18 +1201,26 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.core_ty); - ID_misc = new inst_decoder(is_default, + ID_misc.set_params(is_default, &interface_ip, 8 /* Prefix field etc upto 14B*/, 1, coredynp.x86, Core_device, coredynp.core_ty); + + ID_misc.computeArea(); + ID_misc.computeDynamicPower(); + ID_operand.computeArea(); + ID_operand.computeDynamicPower(); + ID_inst.computeArea(); + ID_inst.computeDynamicPower(); + // TODO: X86 decoder should decode the inst in cyclic mode under the control // of squencer. So the dynamic power should be multiplied by a few times. area.set_area(area.get_area() + - (ID_inst->area.get_area() + ID_operand->area.get_area() + - ID_misc->area.get_area()) * + (ID_inst.area.get_area() + ID_operand.area.get_area() + + ID_misc.area.get_area()) * coredynp.decodeW); } @@ -1258,12 +1266,12 @@ void InstFetchU::computeEnergy(bool is_tdp) { 0; // XML->sys.core[ithCore].BTB.write_accesses; } - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; + ID_inst.stats_t.readAc.access = coredynp.decodeW; + ID_operand.stats_t.readAc.access = coredynp.decodeW; + ID_misc.stats_t.readAc.access = coredynp.decodeW; + ID_inst.tdp_stats = ID_inst.stats_t; + ID_operand.tdp_stats = ID_operand.stats_t; + ID_misc.tdp_stats = ID_misc.stats_t; } else { // init stats for Runtime Dynamic (RTP) @@ -1304,20 +1312,20 @@ void InstFetchU::computeEnergy(bool is_tdp) { BTB->rtp_stats = BTB->stats_t; } - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = + ID_inst.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_operand.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; + ID_misc.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; + ID_inst.rtp_stats = ID_inst.stats_t; + ID_operand.rtp_stats = ID_operand.stats_t; + ID_misc.rtp_stats = ID_misc.stats_t; } icache.power_t.reset(); IB->power_t.reset(); - // ID_inst->power_t.reset(); - // ID_operand->power_t.reset(); - // ID_misc->power_t.reset(); + // ID_inst.power_t.reset(); + // ID_operand.power_t.reset(); + // ID_misc.power_t.reset(); if (coredynp.predictionW > 0) { BTB->power_t.reset(); } @@ -1380,15 +1388,15 @@ void InstFetchU::computeEnergy(bool is_tdp) { power = power + BTB->power + BPT->power; } - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; + ID_inst.power_t.readOp.dynamic = ID_inst.power.readOp.dynamic; + ID_operand.power_t.readOp.dynamic = ID_operand.power.readOp.dynamic; + ID_misc.power_t.readOp.dynamic = ID_misc.power.readOp.dynamic; - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; + ID_inst.power.readOp.dynamic *= ID_inst.tdp_stats.readAc.access; + ID_operand.power.readOp.dynamic *= ID_operand.tdp_stats.readAc.access; + ID_misc.power.readOp.dynamic *= ID_misc.tdp_stats.readAc.access; - power = power + (ID_inst->power + ID_operand->power + ID_misc->power); + power = power + (ID_inst.power + ID_operand.power + ID_misc.power); } else { // icache.rt_power = icache.power_t + // (icache.caches->local_result.power)*pppm_lkg + @@ -1409,15 +1417,15 @@ void InstFetchU::computeEnergy(bool is_tdp) { rt_power = rt_power + BTB->rt_power + BPT->rt_power; } - ID_inst->rt_power.readOp.dynamic = - ID_inst->power_t.readOp.dynamic * ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * - ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = - ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; + ID_inst.rt_power.readOp.dynamic = + ID_inst.power_t.readOp.dynamic * ID_inst.rtp_stats.readAc.access; + ID_operand.rt_power.readOp.dynamic = ID_operand.power_t.readOp.dynamic * + ID_operand.rtp_stats.readAc.access; + ID_misc.rt_power.readOp.dynamic = + ID_misc.power_t.readOp.dynamic * ID_misc.rtp_stats.readAc.access; rt_power = rt_power + - (ID_inst->rt_power + ID_operand->rt_power + ID_misc->rt_power); + (ID_inst.rt_power + ID_operand.rt_power + ID_misc.rt_power); } } @@ -1528,52 +1536,52 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; cout << indent_str << "Instruction Decoder:" << endl; cout << indent_str_next << "Area = " - << (ID_inst->area.get_area() + ID_operand->area.get_area() + - ID_misc->area.get_area()) * + << (ID_inst.area.get_area() + ID_operand.area.get_area() + + ID_misc.area.get_area()) * coredynp.decodeW * 1e-6 << " mm^2" << endl; cout << indent_str_next << "Peak Dynamic = " - << (ID_inst->power.readOp.dynamic + ID_operand->power.readOp.dynamic + - ID_misc->power.readOp.dynamic) * + << (ID_inst.power.readOp.dynamic + ID_operand.power.readOp.dynamic + + ID_misc.power.readOp.dynamic) * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? (ID_inst->power.readOp.longer_channel_leakage + - ID_operand->power.readOp.longer_channel_leakage + - ID_misc->power.readOp.longer_channel_leakage) - : (ID_inst->power.readOp.leakage + - ID_operand->power.readOp.leakage + - ID_misc->power.readOp.leakage)) + << (long_channel ? (ID_inst.power.readOp.longer_channel_leakage + + ID_operand.power.readOp.longer_channel_leakage + + ID_misc.power.readOp.longer_channel_leakage) + : (ID_inst.power.readOp.leakage + + ID_operand.power.readOp.leakage + + ID_misc.power.readOp.leakage)) << " W" << endl; double tot_leakage = - (ID_inst->power.readOp.leakage + ID_operand->power.readOp.leakage + - ID_misc->power.readOp.leakage); + (ID_inst.power.readOp.leakage + ID_operand.power.readOp.leakage + + ID_misc.power.readOp.leakage); double tot_leakage_longchannel = - (ID_inst->power.readOp.longer_channel_leakage + - ID_operand->power.readOp.longer_channel_leakage + - ID_misc->power.readOp.longer_channel_leakage); - double tot_leakage_pg = (ID_inst->power.readOp.power_gated_leakage + - ID_operand->power.readOp.power_gated_leakage + - ID_misc->power.readOp.power_gated_leakage); + (ID_inst.power.readOp.longer_channel_leakage + + ID_operand.power.readOp.longer_channel_leakage + + ID_misc.power.readOp.longer_channel_leakage); + double tot_leakage_pg = (ID_inst.power.readOp.power_gated_leakage + + ID_operand.power.readOp.power_gated_leakage + + ID_misc.power.readOp.power_gated_leakage); double tot_leakage_pg_with_long_channel = - (ID_inst->power.readOp.power_gated_with_long_channel_leakage + - ID_operand->power.readOp.power_gated_with_long_channel_leakage + - ID_misc->power.readOp.power_gated_with_long_channel_leakage); + (ID_inst.power.readOp.power_gated_with_long_channel_leakage + + ID_operand.power.readOp.power_gated_with_long_channel_leakage + + ID_misc.power.readOp.power_gated_with_long_channel_leakage); if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) << " W" << endl; cout << indent_str_next << "Gate Leakage = " - << (ID_inst->power.readOp.gate_leakage + - ID_operand->power.readOp.gate_leakage + - ID_misc->power.readOp.gate_leakage) + << (ID_inst.power.readOp.gate_leakage + + ID_operand.power.readOp.gate_leakage + + ID_misc.power.readOp.gate_leakage) << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << (ID_inst->rt_power.readOp.dynamic + - ID_operand->rt_power.readOp.dynamic + - ID_misc->rt_power.readOp.dynamic) / + << (ID_inst.rt_power.readOp.dynamic + + ID_operand.rt_power.readOp.dynamic + + ID_misc.rt_power.readOp.dynamic) / executionTime << " W" << endl; cout << endl; @@ -1615,18 +1623,7 @@ InstFetchU ::~InstFetchU() { delete IB; IB = 0; } - if (ID_inst) { - delete ID_inst; - ID_inst = 0; - } - if (ID_operand) { - delete ID_operand; - ID_operand = 0; - } - if (ID_misc) { - delete ID_misc; - ID_misc = 0; - } + if (coredynp.predictionW > 0) { if (BTB) { delete BTB; diff --git a/src/core/instfetch.h b/src/core/instfetch.h index b1606ce..e805c91 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -132,9 +132,9 @@ class InstFetchU : public Component { ArrayST *IB; ArrayST *BTB; BranchPredictor *BPT; - inst_decoder *ID_inst; - inst_decoder *ID_operand; - inst_decoder *ID_misc; + inst_decoder ID_inst; + inst_decoder ID_operand; + inst_decoder ID_misc; bool exist; InstFetchU(const ParseXML *XML_interface, diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index f0f876d..bd4a7dc 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -31,16 +31,14 @@ #include "inst_decoder.h" -inst_decoder::inst_decoder(bool _is_default, +void inst_decoder::set_params(bool _is_default, const InputParameter *configure_interface, int opcode_length_, int num_decoders_, bool x86_, enum Device_ty device_ty_, enum Core_type core_ty_) - : is_default(_is_default), opcode_length(opcode_length_), - num_decoders(num_decoders_), x86(x86_), device_ty(device_ty_), - core_ty(core_ty_) { + { /* * Instruction decoder is different from n to 2^n decoders * that are commonly used in row decoders in memory arrays. @@ -64,6 +62,9 @@ inst_decoder::inst_decoder(bool _is_default, * it involve both decoding instructions into u-ops and * merge u-ops when doing micro-ops fusion. */ + is_default=_is_default; opcode_length=opcode_length_; + num_decoders=num_decoders_; x86=x86_; device_ty=device_ty_; + core_ty=core_ty_; bool is_dram = false; double pmos_to_nmos_sizing_r; double load_nmos_width, load_pmos_width; @@ -97,7 +98,7 @@ inst_decoder::inst_decoder(bool _is_default, false /*wl_tr*/, // to use peri device cell); final_dec.computeArea(); - PredecBlk predec_blk1; + predec_blk1.set_params(num_decoded_signals, &final_dec, 0, // Assuming predec and dec are back to back @@ -105,7 +106,7 @@ inst_decoder::inst_decoder(bool _is_default, 1, // Each Predec only drives one final dec false /*is_dram*/, true); - PredecBlk predec_blk2; + predec_blk2.set_params(num_decoded_signals, &final_dec, 0, // Assuming predec and dec are back to back @@ -114,14 +115,17 @@ inst_decoder::inst_decoder(bool _is_default, false /*is_dram*/, false); - PredecBlkDrv predec_blk_drv1; + predec_blk_drv1.set_params(0, &predec_blk1, false); - PredecBlkDrv predec_blk_drv2; + predec_blk_drv2.set_params(0, &predec_blk2, false); pre_dec.set_params(&predec_blk_drv1, &predec_blk_drv2); - double area_decoder = final_dec.area.get_area() * num_decoded_signals * +} + +void inst_decoder::computeArea(){ + double area_decoder = final_dec.area.get_area() * num_decoded_signals * num_decoder_segments * num_decoders; // double w_decoder = area_decoder / area.get_h(); double area_pre_dec = @@ -132,7 +136,9 @@ inst_decoder::inst_decoder(bool _is_default, double macro_layout_overhead = g_tp.macro_layout_overhead; double chip_PR_overhead = g_tp.chip_layout_overhead; area.set_area(area.get_area() * macro_layout_overhead * chip_PR_overhead); +} +void inst_decoder::computeDynamicPower(){ inst_decoder_delay_power(); double sckRation = g_tp.sckt_co_eff; diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index b38360b..2d4b7c3 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -20,14 +20,14 @@ class inst_decoder : public Component { public: - inst_decoder(bool _is_default, + void set_params(bool _is_default, const InputParameter *configure_interface, int opcode_length_, int num_decoders_, bool x86_, enum Device_ty device_ty_ = Core_device, enum Core_type core_ty_ = Inorder); - inst_decoder(); + inst_decoder(){}; bool is_default; int opcode_length; int num_decoders; @@ -41,11 +41,18 @@ class inst_decoder : public Component { Decoder final_dec; Predec pre_dec; - + PredecBlk predec_blk1; + PredecBlk predec_blk2; + PredecBlkDrv predec_blk_drv1; + PredecBlkDrv predec_blk_drv2; statsDef tdp_stats; statsDef rtp_stats; statsDef stats_t; powerDef power_t; + + void computeArea(); + void computeDynamicPower(); + void inst_decoder_delay_power(); ~inst_decoder(); void leakage_feedback(double temperature); From bd7c49951b9abbd3c87afc57a90469b91f6e30a8 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 01:24:23 -0500 Subject: [PATCH 42/59] NoC calculation incorrect by a factor of 1/2 for all niagra based tests --- src/array.h | 9 +-- src/cache/datacache.h | 2 +- src/cache/sharedcache.cc | 119 +++++++++++++++++++++++++++++++++--- src/cacti/cacti_interface.h | 54 ++++++++++++---- src/cacti/component.h | 4 +- src/cacti/router.h | 17 ++++++ src/main.cc | 29 ++++++--- src/noc.cc | 25 ++++---- src/noc.h | 3 +- src/options.cc | 3 +- src/options.h | 3 +- src/processor.cc | 43 +++++++------ unit_test/unit_test.py | 82 ++++++++++++++++++++++++- unit_test/unit_test.sh | 6 +- 14 files changed, 320 insertions(+), 79 deletions(-) diff --git a/src/array.h b/src/array.h index 20c14f3..fcc5e7c 100644 --- a/src/array.h +++ b/src/array.h @@ -90,14 +90,11 @@ class ArrayST : public Component { template void serialize(Archive &ar, const unsigned int version) { ar &name; - ar &device_ty; - ar &opt_local; - ar &core_ty; - ar &is_default; + ar &power_t; + ar &stats_t; ar &tdp_stats; ar &rtp_stats; - ar &stats_t; - ar &power_t; + ar &local_result; Component::serialize(ar, version); } }; diff --git a/src/cache/datacache.h b/src/cache/datacache.h index fb47270..2b2d99b 100644 --- a/src/cache/datacache.h +++ b/src/cache/datacache.h @@ -61,7 +61,7 @@ class DataCache : public InstCache { template void serialize(Archive &ar, const unsigned int version) { ar &wbb; - Component::serialize(ar, version); + InstCache::serialize(ar, version); } }; diff --git a/src/cache/sharedcache.cc b/src/cache/sharedcache.cc index d6b6517..7a41fbd 100644 --- a/src/cache/sharedcache.cc +++ b/src/cache/sharedcache.cc @@ -408,30 +408,71 @@ void SharedCache::computeArea() { area.set_area(area.get_area() + unicache.caches.local_result.area); interface_ip.force_cache_config = false; + if (unicache.caches.local_result.tag_array2 != nullptr) { + unicache.caches.local_result.ta2_power = + unicache.caches.local_result.tag_array2->power; + } + if (unicache.caches.local_result.data_array2 != nullptr) { + unicache.caches.local_result.da2_power = + unicache.caches.local_result.data_array2->power; + } + if (!((cachep.dir_ty == ST && cacheL == L1Directory) || (cachep.dir_ty == ST && cacheL == L2Directory))) { unicache.missb.computeArea(); unicache.area.set_area(unicache.area.get_area() + unicache.missb.local_result.area); area.set_area(area.get_area() + unicache.missb.local_result.area); + if (unicache.missb.local_result.tag_array2 != nullptr) { + unicache.missb.local_result.ta2_power = + unicache.missb.local_result.tag_array2->power; + } + if (unicache.missb.local_result.data_array2 != nullptr) { + unicache.missb.local_result.da2_power = + unicache.missb.local_result.data_array2->power; + } // Fill Buffer: unicache.ifb.computeArea(); unicache.area.set_area(unicache.area.get_area() + unicache.ifb.local_result.area); area.set_area(area.get_area() + unicache.ifb.local_result.area); + if (unicache.ifb.local_result.tag_array2 != nullptr) { + unicache.ifb.local_result.ta2_power = + unicache.ifb.local_result.tag_array2->power; + } + if (unicache.ifb.local_result.data_array2 != nullptr) { + unicache.ifb.local_result.da2_power = + unicache.ifb.local_result.data_array2->power; + } // Prefetch Buffer: unicache.prefetchb.computeArea(); unicache.area.set_area(unicache.area.get_area() + unicache.prefetchb.local_result.area); area.set_area(area.get_area() + unicache.prefetchb.local_result.area); + if (unicache.prefetchb.local_result.tag_array2 != nullptr) { + unicache.prefetchb.local_result.ta2_power = + unicache.prefetchb.local_result.tag_array2->power; + } + if (unicache.prefetchb.local_result.data_array2 != nullptr) { + unicache.prefetchb.local_result.da2_power = + unicache.prefetchb.local_result.data_array2->power; + } // WBB: unicache.wbb.computeArea(); unicache.area.set_area(unicache.area.get_area() + unicache.wbb.local_result.area); area.set_area(area.get_area() + unicache.wbb.local_result.area); + if (unicache.wbb.local_result.tag_array2 != nullptr) { + unicache.wbb.local_result.ta2_power = + unicache.wbb.local_result.tag_array2->power; + } + if (unicache.wbb.local_result.data_array2 != nullptr) { + unicache.wbb.local_result.da2_power = + unicache.wbb.local_result.data_array2->power; + } } set_area = true; } @@ -447,6 +488,10 @@ void SharedCache::computeStaticPower(bool is_tdp) { "computeStaticPower()\n"; exit(1); } + if (is_tdp) { + power.reset(); + rt_power.reset(); + } double homenode_data_access = (cachep.dir_ty == SBT) ? 0.9 : 1.0; if (is_tdp) { if (!((cachep.dir_ty == ST && cacheL == L1Directory) || @@ -455,71 +500,106 @@ void SharedCache::computeStaticPower(bool is_tdp) { unicache.caches.stats_t.readAc.access = .67 * unicache.caches.l_ip.num_rw_ports * cachep.duty_cycle * homenode_data_access; + // std::cout << unicache.caches.stats_t.readAc.access << "\n"; unicache.caches.stats_t.readAc.miss = 0; + // std::cout << unicache.caches.stats_t.readAc.miss << "\n"; unicache.caches.stats_t.readAc.hit = unicache.caches.stats_t.readAc.access - unicache.caches.stats_t.readAc.miss; + // std::cout << unicache.caches.stats_t.readAc.hit << "\n"; unicache.caches.stats_t.writeAc.access = .33 * unicache.caches.l_ip.num_rw_ports * cachep.duty_cycle * homenode_data_access; + // std::cout << unicache.caches.stats_t.writeAc.access << "\n"; unicache.caches.stats_t.writeAc.miss = 0; + // std::cout << unicache.caches.stats_t.writeAc.miss << "\n"; unicache.caches.stats_t.writeAc.hit = unicache.caches.stats_t.writeAc.access - unicache.caches.stats_t.writeAc.miss; + // std::cout << unicache.caches.stats_t.writeAc.hit << "\n"; unicache.caches.tdp_stats = unicache.caches.stats_t; if (cachep.dir_ty == SBT) { homenode_stats_t.readAc.access = .67 * unicache.caches.l_ip.num_rw_ports * cachep.dir_duty_cycle * (1 - homenode_data_access); + // std::cout << homenode_stats_t.readAc.access << "\n"; homenode_stats_t.readAc.miss = 0; + // std::cout << homenode_stats_t.readAc.miss << "\n"; homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; + // std::cout << homenode_stats_t.readAc.hit << "\n"; homenode_stats_t.writeAc.access = .67 * unicache.caches.l_ip.num_rw_ports * cachep.dir_duty_cycle * (1 - homenode_data_access); + // std::cout << homenode_stats_t.writeAc.access << "\n"; homenode_stats_t.writeAc.miss = 0; + // std::cout << homenode_stats_t.writeAc.miss << "\n"; homenode_stats_t.writeAc.hit = homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss; + // std::cout << homenode_stats_t.writeAc.hit << "\n"; homenode_tdp_stats = homenode_stats_t; } unicache.missb.stats_t.readAc.access = unicache.missb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.missb.stats_t.readAc.access"; + // std::cout << unicache.missb.stats_t.readAc.access << "\n"; unicache.missb.stats_t.writeAc.access = unicache.missb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.missb.stats_t.writeAc.access"; + // std::cout << unicache.missb.stats_t.writeAc.access << "\n"; unicache.missb.tdp_stats = unicache.missb.stats_t; unicache.ifb.stats_t.readAc.access = unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.ifb.stats_t.readAc.access"; + // std::cout << unicache.ifb.stats_t.readAc.access << "\n"; unicache.ifb.stats_t.writeAc.access = unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.ifb.stats_t.writeAc.access"; + // std::cout << unicache.ifb.stats_t.writeAc.access << "\n"; unicache.ifb.tdp_stats = unicache.ifb.stats_t; unicache.prefetchb.stats_t.readAc.access = unicache.prefetchb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.prefetchb.stats_t.readAc.access"; + // std::cout << unicache.prefetchb.stats_t.readAc.access << "\n"; unicache.prefetchb.stats_t.writeAc.access = unicache.ifb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.prefetchb.stats_t.writeAc.access"; + // std::cout << unicache.prefetchb.stats_t.writeAc.access << "\n"; unicache.prefetchb.tdp_stats = unicache.prefetchb.stats_t; unicache.wbb.stats_t.readAc.access = unicache.wbb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.wbb.stats_t.readAc.access"; + // std::cout << unicache.wbb.stats_t.readAc.access << "\n"; unicache.wbb.stats_t.writeAc.access = unicache.wbb.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << "unicache.wbb.stats_t.writeAc.access"; + // std::cout << unicache.wbb.stats_t.writeAc.access << "\n"; unicache.wbb.tdp_stats = unicache.wbb.stats_t; } else { unicache.caches.stats_t.readAc.access = unicache.caches.l_ip.num_search_ports * cachep.duty_cycle; + // std::cout << unicache.caches.stats_t.readAc.access << "\n"; unicache.caches.stats_t.readAc.miss = 0; + // std::cout << unicache.caches.stats_t.readAc.miss << "\n"; unicache.caches.stats_t.readAc.hit = unicache.caches.stats_t.readAc.access - unicache.caches.stats_t.readAc.miss; + // std::cout << unicache.caches.stats_t.readAc.hit << "\n"; unicache.caches.stats_t.writeAc.access = 0; + // std::cout << unicache.caches.stats_t.writeAc.access << "\n"; unicache.caches.stats_t.writeAc.miss = 0; + // std::cout << unicache.caches.stats_t.writeAc.miss << "\n"; unicache.caches.stats_t.writeAc.hit = unicache.caches.stats_t.writeAc.access - unicache.caches.stats_t.writeAc.miss; + // std::cout << unicache.caches.stats_t.writeAc.hit << "\n"; unicache.caches.tdp_stats = unicache.caches.stats_t; + // std::cout << unicache.caches.stats_t.writeAc.hit << "\n"; } } else { @@ -675,32 +755,45 @@ void SharedCache::computeStaticPower(bool is_tdp) { (unicache.caches.stats_t.readAc.hit * unicache.caches.local_result.power.readOp.dynamic + unicache.caches.stats_t.readAc.miss * - unicache.caches.local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.ta2_power.readOp.dynamic + unicache.caches.stats_t.writeAc.miss * - unicache.caches.local_result.tag_array2->power.writeOp.dynamic + + unicache.caches.local_result.ta2_power.writeOp.dynamic + unicache.caches.stats_t.writeAc.access * unicache.caches.local_result.power.writeOp .dynamic); // write miss will also generate a write later + // std::cout << "unicache.caches.local_result.power.readOp.dynamic "; + // std::cout << unicache.caches.local_result.power.readOp.dynamic << "\n"; + // std::cout << "unicache.caches.local_result.ta2_power.readOp.dynamic "; + // std::cout << unicache.caches.local_result.ta2_power.readOp.dynamic << + // "\n"; std::cout << + // "unicache.caches.local_result.ta2_power.writeOp.dynamic + // "; std::cout << unicache.caches.local_result.ta2_power.writeOp.dynamic << + // "\n"; std::cout << "unicache.caches.local_result.power.writeOp.dynamic "; + // std::cout << unicache.caches.local_result.power.writeOp.dynamic << "\n"; + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; if (cachep.dir_ty == SBT) { unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * - (unicache.caches.local_result.data_array2->power.readOp.dynamic * + (unicache.caches.local_result.da2_power.readOp.dynamic * dir_overhead + - unicache.caches.local_result.tag_array2->power.readOp.dynamic) + + unicache.caches.local_result.ta2_power.readOp.dynamic) + homenode_stats_t.readAc.miss * - unicache.caches.local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.ta2_power.readOp.dynamic + homenode_stats_t.writeAc.miss * - unicache.caches.local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.ta2_power.readOp.dynamic + homenode_stats_t.writeAc.hit * - (unicache.caches.local_result.data_array2->power.writeOp.dynamic * + (unicache.caches.local_result.da2_power.writeOp.dynamic * dir_overhead + - unicache.caches.local_result.tag_array2->power.readOp.dynamic + + unicache.caches.local_result.ta2_power.readOp.dynamic + homenode_stats_t.writeAc.miss * unicache.caches.local_result.power.writeOp .dynamic); // write miss on dynamic home node will // generate a replacement write on whole cache // block + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; } unicache.power_t.readOp.dynamic += @@ -709,27 +802,37 @@ void SharedCache::computeStaticPower(bool is_tdp) { unicache.missb.stats_t.writeAc.access * unicache.missb.local_result.power.writeOp .dynamic; // each access to missb involves a CAM and a write + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; unicache.power_t.readOp.dynamic += unicache.ifb.stats_t.readAc.access * unicache.ifb.local_result.power.searchOp.dynamic + unicache.ifb.stats_t.writeAc.access * unicache.ifb.local_result.power.writeOp.dynamic; + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; unicache.power_t.readOp.dynamic += unicache.prefetchb.stats_t.readAc.access * unicache.prefetchb.local_result.power.searchOp.dynamic + unicache.prefetchb.stats_t.writeAc.access * unicache.prefetchb.local_result.power.writeOp.dynamic; + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; unicache.power_t.readOp.dynamic += unicache.wbb.stats_t.readAc.access * unicache.wbb.local_result.power.searchOp.dynamic + unicache.wbb.stats_t.writeAc.access * unicache.wbb.local_result.power.writeOp.dynamic; + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; } else { unicache.power_t.readOp.dynamic += (unicache.caches.stats_t.readAc.access * unicache.caches.local_result.power.searchOp.dynamic + unicache.caches.stats_t.writeAc.access * unicache.caches.local_result.power.writeOp.dynamic); + // std::cout << "unicache.power_t.readOp.dynamic "; + // std::cout << unicache.power_t.readOp.dynamic << "\n"; } if (is_tdp) { diff --git a/src/cacti/cacti_interface.h b/src/cacti/cacti_interface.h index fa9f15e..64cefb3 100644 --- a/src/cacti/cacti_interface.h +++ b/src/cacti/cacti_interface.h @@ -418,6 +418,8 @@ class uca_org_t { public: mem_array *tag_array2; mem_array *data_array2; + powerDef ta2_power; // Hack for serialization + powerDef da2_power; // Hack for serialization double access_time; double cycle_time; double area; @@ -444,6 +446,17 @@ class uca_org_t { void adjust_area(); // for McPAT only to adjust routing overhead void cleanup(); ~uca_org_t(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &power; + ar &area; + ar &ta2_power; + ar &da2_power; + } }; void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); @@ -636,12 +649,20 @@ class mem_array { double subarray_length; double subarray_height; - double delay_route_to_bank, delay_input_htree, - delay_row_predecode_driver_and_block, delay_row_decoder, delay_bitlines, - delay_sense_amp, delay_subarray_output_driver, delay_dout_htree, - delay_comparator, delay_matchlines; + double delay_route_to_bank; + double delay_input_htree; + double delay_row_predecode_driver_and_block; + double delay_row_decoder; + double delay_bitlines; + double delay_sense_amp; + double delay_subarray_output_driver; + double delay_dout_htree; + double delay_comparator; + double delay_matchlines; - double all_banks_height, all_banks_width, area_efficiency; + double all_banks_height; + double all_banks_width; + double area_efficiency; powerDef power_routing_to_bank; powerDef power_addr_input_htree; @@ -679,9 +700,14 @@ class mem_array { enum Wire_type wt; // dram stats - double activate_energy, read_energy, write_energy, precharge_energy, - refresh_power, leak_power_subbank_closed_page, - leak_power_subbank_open_page, leak_power_request_and_reply_networks; + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; + double refresh_power; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; double precharge_delay; @@ -692,10 +718,14 @@ class mem_array { double sram_sleep_tx_width, wl_sleep_tx_width, cl_sleep_tx_width; double sram_sleep_tx_area, wl_sleep_tx_area, cl_sleep_tx_area; - double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, - cl_sleep_wakeup_latency, bl_floating_wakeup_latency; - double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, - cl_sleep_wakeup_energy, bl_floating_wakeup_energy; + double sram_sleep_wakeup_latency; + double wl_sleep_wakeup_latency; + double cl_sleep_wakeup_latency; + double bl_floating_wakeup_latency; + double sram_sleep_wakeup_energy; + double wl_sleep_wakeup_energy; + double cl_sleep_wakeup_energy; + double bl_floating_wakeup_energy; int num_active_mats; int num_submarray_mats; diff --git a/src/cacti/component.h b/src/cacti/component.h index 9f36351..9a8eb46 100644 --- a/src/cacti/component.h +++ b/src/cacti/component.h @@ -82,10 +82,10 @@ class Component { template void serialize(Archive &ar, const unsigned int version) { - ar &delay; - ar &cycle_time; ar &power; ar &rt_power; + ar &delay; + ar &cycle_time; ar &area; } diff --git a/src/cacti/router.h b/src/cacti/router.h index fb6f1fa..8a778f6 100644 --- a/src/cacti/router.h +++ b/src/cacti/router.h @@ -42,6 +42,10 @@ #include "wire.h" #include +#include +#include +#include +#include #include class Router : public Component { @@ -112,6 +116,19 @@ class Router : public Component { void get_router_delay(); double min_w_pmos; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &arbiter; + ar &crossbar; + ar &buffer; + ar &Component::power; + ar &Component::area; + // Component::serialize(ar, version); + } }; #endif diff --git a/src/main.cc b/src/main.cc index 8ea5b27..9e5e6f5 100644 --- a/src/main.cc +++ b/src/main.cc @@ -50,9 +50,15 @@ void save(const Processor &s, std::string name) { void restore(Processor &s, std::string name) { // Restore from the Archive + // std::cerr << "Archive " << name << "\n"; std::ifstream ifs(name.c_str()); - boost::archive::text_iarchive ia(ifs); - ia >> s; + if (ifs.good()) { + boost::archive::text_iarchive ia(ifs); + ia >> s; + } else { + std::cerr << "Archive " << name << " cannot be used\n"; + assert(false); + } } using namespace std; @@ -71,14 +77,19 @@ int main(int argc, char *argv[]) { // parse XML-based interface ParseXML *p1 = new ParseXML(); Processor proc; - Processor proc2; p1->parse(opt.input_xml); - proc.init(p1); - save(proc, opt.serialization_name); - restore(proc2, opt.serialization_name); - proc2.init(p1, true); - proc2.displayEnergy(2, opt.print_level); - // proc.displayEnergy(2, opt.print_level); + if (opt.serialization_create) { + proc.init(p1); + save(proc, opt.serialization_file); + std::cout << "Checkpoint generated @: " << opt.serialization_file << "\n"; + return 0; + } else if (opt.serialization_restore) { + restore(proc, opt.serialization_file); + proc.init(p1, true); + } else { + proc.init(p1); + } + proc.displayEnergy(2, opt.print_level); delete p1; return 0; } diff --git a/src/noc.cc b/src/noc.cc index 7ff180e..982263a 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -213,36 +213,39 @@ void NoC::set_stats(const ParseXML *XML) { init_stats = true; } -void NoC::computePower() { +void NoC::computePower(bool cp) { double pppm_t[4] = {1, 1, 1, 1}; double M = nocdynp.duty_cycle; // init stats for TDP stats_t.readAc.access = M; tdp_stats = stats_t; if (router_exist) { - set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern - router.power = router.power * pppm_t; - set_pppm(pppm_t, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); + if (!cp) { + set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern + router.power = router.power * pppm_t; + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); + } power = power + router.power * pppm_t; } if (link_bus_exist) { - if (nocdynp.type) + if (nocdynp.type) { set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports - 1), nocdynp.global_linked_ports, nocdynp.global_linked_ports, nocdynp.global_linked_ports); - // reset traffic pattern; local port do not have router links - else + // reset traffic pattern; local port do not have router links + } else { set_pppm(pppm_t, 1 * M_traffic_pattern * M * (nocdynp.min_ports), nocdynp.global_linked_ports, nocdynp.global_linked_ports, nocdynp.global_linked_ports); // reset traffic pattern + } link_bus_tot_per_Router.power = link_bus.power * pppm_t; diff --git a/src/noc.h b/src/noc.h index 17e4ad5..9445ff7 100644 --- a/src/noc.h +++ b/src/noc.h @@ -74,7 +74,7 @@ class NoC : public Component { double link_len_ = 0); void set_stats(const ParseXML *XML); void computeArea(); - void computePower(); + void computePower(bool cp = false); void computeRuntimeDynamicPower(); void init_link_bus(double link_len_); void display(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); @@ -110,6 +110,7 @@ class NoC : public Component { ar &router_exist; ar &link_bus_exist; ar &link_bus_tot_per_Router; + // ar &link_bus_tot_per_Router.area; ar &Component::area; // Component::serialize(ar, version); } diff --git a/src/options.cc b/src/options.cc index 6d30dde..dd5b741 100644 --- a/src/options.cc +++ b/src/options.cc @@ -52,8 +52,7 @@ bool mcpat::Options::parse(int argc, char **argv) { po::options_description serialization("Serialization Options"); serialization.add_options() - ("serial_path", po::value(&serialization_path), "Path/to/serialization") - ("serial_name", po::value(&serialization_name)->default_value("mcpat_cp.txt"), "file name to serialize to") + ("serial_file", po::value(&serialization_file)->default_value("mcpat_cp.txt"), "file name to serialize to") ("serial_create", po::value(&serialization_create)->default_value(false), "Create A Serialization Checkpoint") ("serial_restore", po::value(&serialization_restore)->default_value(false), "Restore from a Serialization Checkpoint") ; diff --git a/src/options.h b/src/options.h index 55e903a..30421f9 100644 --- a/src/options.h +++ b/src/options.h @@ -53,8 +53,7 @@ class Options { bool opt_for_clk = true; // Serialization Options - std::string serialization_path = ""; - std::string serialization_name = ""; + std::string serialization_file = ""; bool serialization_create = false; bool serialization_restore = false; diff --git a/src/processor.cc b/src/processor.cc index 461ec0d..808ccba 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -513,31 +513,34 @@ void Processor::init(const ParseXML *XML, bool cp) { * at the end (even after the NOC router part) since the total chip area * must be obtain to decide the link routing */ - for (i = 0; i < numNOC; i++) { - if (nocs[i].nocdynp.has_global_link && XML->sys.NoC[i].type) { - nocs[i].init_link_bus( - sqrt(area.get_area() * - XML->sys.NoC[i].chip_coverage)); // compute global links - if (procdynp.homoNOC) { - noc.area.set_area(noc.area.get_area() + - nocs[i].link_bus_tot_per_Router.area.get_area() * - nocs[i].nocdynp.total_nodes * procdynp.numNOC); - area.set_area(area.get_area() + - nocs[i].link_bus_tot_per_Router.area.get_area() * - nocs[i].nocdynp.total_nodes * procdynp.numNOC); - } else { - noc.area.set_area(noc.area.get_area() + - nocs[i].link_bus_tot_per_Router.area.get_area() * - nocs[i].nocdynp.total_nodes); - area.set_area(area.get_area() + - nocs[i].link_bus_tot_per_Router.area.get_area() * - nocs[i].nocdynp.total_nodes); + if (!cp) { + for (i = 0; i < numNOC; i++) { + if (nocs[i].nocdynp.has_global_link && XML->sys.NoC[i].type) { + nocs[i].init_link_bus( + sqrt(area.get_area() * + XML->sys.NoC[i].chip_coverage)); // compute global links + if (procdynp.homoNOC) { + noc.area.set_area(noc.area.get_area() + + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes * + procdynp.numNOC); + area.set_area(area.get_area() + + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes * procdynp.numNOC); + } else { + noc.area.set_area(noc.area.get_area() + + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes); + area.set_area(area.get_area() + + nocs[i].link_bus_tot_per_Router.area.get_area() * + nocs[i].nocdynp.total_nodes); + } } } } // Compute energy of NoC (w or w/o links) or buses for (i = 0; i < numNOC; i++) { - nocs[i].computePower(); + nocs[i].computePower(cp); nocs[i].computeRuntimeDynamicPower(); if (procdynp.homoNOC) { set_pppm(pppm_t, diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index 1a4b5a2..1e3e339 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -43,6 +43,12 @@ output_path = "./output" golden_path = "./golden" +#parser = argparse.ArgumentParser() +#parser.add_argument('--input', type=str, default="", help="input path") +#parser.add_argument('--warmup', type=int, default=100000, help="time in nanoseconds of the warmup") +#parser.add_argument('--end', type=int, default=0, help="time in nanoseconds of end of the plot") +#args = parser.parse_args() + def print_info(info, *args): if verbose: @@ -99,7 +105,7 @@ def diff_result(vector): return 1 -def run_test(vector): +def run_test_normal(vector): global kill_flag kill_flag = False infile = os.path.join(input_path, vector + ".xml") @@ -134,6 +140,68 @@ def run_test(vector): return 0 +def run_test_serializaiton_create(vector): + global kill_flag + kill_flag = False + infile = os.path.join(input_path, vector + ".xml") + sname = os.path.join(output_path, vector + ".txt") + stdo = os.path.join(output_path, vector + ".out") + stde = os.path.join(output_path, vector + ".err") + with open(stdo, "w") as so, open(stde, "w") as se: + p = subprocess.Popen([ + "../build/mcpat", "-i", infile, "-p", "5", "--serial_create=true", + "--serial_file=" + sname + ], + stdout=so, + stderr=se) + t = Timer(timeout_limit, kill, [p]) + t.start() + p.wait() + t.cancel() + if kill_flag: + print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") + return 1 + if os.stat(os.path.join(output_path, vector + ".err")).st_size == 0: + print_pass(vector) + return 0 + else: + return 1 + return 0 + + +def run_test_serialization_restore(vector): + global kill_flag + kill_flag = False + infile = os.path.join(input_path, vector + ".xml") + sname = os.path.join(output_path, vector + ".txt") + stdo = os.path.join(output_path, vector + ".out") + stde = os.path.join(output_path, vector + ".err") + with open(stdo, "w") as so, open(stde, "w") as se: + p = subprocess.Popen([ + "../build/mcpat", "-i", infile, "-p", "5", "--serial_restore=true", + "--serial_file=" + sname + ], + stdout=so, + stderr=se) + t = Timer(timeout_limit, kill, [p]) + t.start() + p.wait() + t.cancel() + if kill_flag: + print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") + return 1 + else: + if diff_result(vector) == 0: + print_pass(vector) + return 0 + else: + print_fail( + vector, + "The files " + vector + ".out and " + vector + ".golden differ") + return 1 + return 0 + + def get_vectors(): files = glob.glob(os.path.join(input_path, "*")) vectors = sorted([os.path.basename(f).split(".")[0] for f in files]) @@ -146,8 +214,18 @@ def get_vectors(): print_info(start) vectors = get_vectors() print_info("Found " + str(len(vectors)) + " test vectors") + #for vector in vectors: + # if run_test_normal(vector) == 0: + # p += 1 + # else: + # f += 1 + #for vector in vectors: + # if run_test_serializaiton_create(vector) == 0: + # p += 1 + # else: + # f += 1 for vector in vectors: - if run_test(vector) == 0: + if run_test_serialization_restore(vector) == 0: p += 1 else: f += 1 diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 708510f..0b99e79 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -39,9 +39,9 @@ OUTPUT="./output" if [ ! -d $OUTPUT ]; then print_info "Creating $OUTPUT" mkdir -p $OUTPUT -else - print_info "Cleaning $OUTPUT" - rm -f $OUTPUT/* +#else + #print_info "Cleaning $OUTPUT" + #rm -f $OUTPUT/* fi #-------------------------------------------------------------------- From 53a8a4e98053b2fbde5e0ff2c1248772ff9946df Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 12:24:46 -0500 Subject: [PATCH 43/59] Unit Test Update & Merge & bugfix --- src/logic/dep_resource_conflict_check.cc | 8 +- src/noc.cc | 10 +- .../{ => basic_test_1}/ARM_A9_2GHz.golden | 0 .../ARM_A9_2GHz_withIOC.golden | 0 .../{ => basic_test_1}/Alpha21364.golden | 0 .../golden/{ => basic_test_1}/Niagara1.golden | 0 .../Niagara1_sharing_DC.golden | 0 .../Niagara1_sharing_SBT.golden | 0 .../Niagara1_sharing_ST.golden | 0 .../golden/{ => basic_test_1}/Niagara2.golden | 0 .../golden/{ => basic_test_1}/Penryn.golden | 0 .../golden/{ => basic_test_1}/Xeon.golden | 0 .../input/{ => basic_test_1}/ARM_A9_2GHz.xml | 0 .../ARM_A9_2GHz_withIOC.xml | 0 .../input/{ => basic_test_1}/Alpha21364.xml | 0 .../input/{ => basic_test_1}/Niagara1.xml | 0 .../Niagara1_sharing_DC.xml | 0 .../Niagara1_sharing_SBT.xml | 0 .../Niagara1_sharing_ST.xml | 0 .../input/{ => basic_test_1}/Niagara2.xml | 0 unit_test/input/{ => basic_test_1}/Penryn.xml | 0 unit_test/input/{ => basic_test_1}/Xeon.xml | 0 unit_test/input/serialization_test_1/mp_1.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_10.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_11.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_12.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_13.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_14.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_15.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_16.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_17.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_18.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_19.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_2.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_20.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_21.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_22.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_23.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_24.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_25.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_26.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_27.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_28.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_29.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_3.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_30.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_31.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_32.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_33.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_34.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_35.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_36.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_37.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_38.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_39.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_4.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_40.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_41.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_42.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_43.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_44.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_45.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_46.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_47.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_48.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_49.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_5.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_50.xml | 533 ++++++++++++++++++ .../input/serialization_test_1/mp_51.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_6.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_7.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_8.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_1/mp_9.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_1.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_10.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_11.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_12.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_13.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_14.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_15.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_16.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_17.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_18.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_19.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_2.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_20.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_21.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_22.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_23.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_24.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_25.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_26.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_27.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_28.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_29.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_3.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_30.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_31.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_32.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_33.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_34.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_35.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_36.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_37.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_38.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_39.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_4.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_40.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_41.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_42.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_43.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_44.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_45.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_46.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_47.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_48.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_49.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_5.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_50.xml | 533 ++++++++++++++++++ .../input/serialization_test_2/mp_51.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_6.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_7.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_8.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_2/mp_9.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_1.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_10.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_11.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_12.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_13.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_14.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_15.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_16.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_17.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_18.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_19.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_2.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_20.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_21.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_22.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_23.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_24.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_25.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_26.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_27.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_28.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_29.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_3.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_30.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_31.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_32.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_33.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_34.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_35.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_36.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_37.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_38.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_39.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_4.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_40.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_41.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_42.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_43.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_44.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_45.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_46.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_47.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_48.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_49.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_5.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_50.xml | 533 ++++++++++++++++++ .../input/serialization_test_3/mp_51.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_6.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_7.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_8.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_3/mp_9.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_1.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_10.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_11.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_12.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_13.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_14.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_15.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_16.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_17.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_18.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_19.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_2.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_20.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_21.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_22.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_23.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_24.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_25.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_26.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_27.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_28.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_29.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_3.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_30.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_31.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_32.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_33.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_34.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_35.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_36.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_37.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_38.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_39.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_4.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_40.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_41.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_42.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_43.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_44.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_45.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_46.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_47.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_48.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_49.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_5.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_50.xml | 533 ++++++++++++++++++ .../input/serialization_test_4/mp_51.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_6.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_7.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_8.xml | 533 ++++++++++++++++++ unit_test/input/serialization_test_4/mp_9.xml | 533 ++++++++++++++++++ unit_test/unit_test.py | 57 +- unit_test/unit_test.sh | 18 +- 228 files changed, 108789 insertions(+), 36 deletions(-) rename unit_test/golden/{ => basic_test_1}/ARM_A9_2GHz.golden (100%) rename unit_test/golden/{ => basic_test_1}/ARM_A9_2GHz_withIOC.golden (100%) rename unit_test/golden/{ => basic_test_1}/Alpha21364.golden (100%) rename unit_test/golden/{ => basic_test_1}/Niagara1.golden (100%) rename unit_test/golden/{ => basic_test_1}/Niagara1_sharing_DC.golden (100%) rename unit_test/golden/{ => basic_test_1}/Niagara1_sharing_SBT.golden (100%) rename unit_test/golden/{ => basic_test_1}/Niagara1_sharing_ST.golden (100%) rename unit_test/golden/{ => basic_test_1}/Niagara2.golden (100%) rename unit_test/golden/{ => basic_test_1}/Penryn.golden (100%) rename unit_test/golden/{ => basic_test_1}/Xeon.golden (100%) rename unit_test/input/{ => basic_test_1}/ARM_A9_2GHz.xml (100%) rename unit_test/input/{ => basic_test_1}/ARM_A9_2GHz_withIOC.xml (100%) rename unit_test/input/{ => basic_test_1}/Alpha21364.xml (100%) rename unit_test/input/{ => basic_test_1}/Niagara1.xml (100%) rename unit_test/input/{ => basic_test_1}/Niagara1_sharing_DC.xml (100%) rename unit_test/input/{ => basic_test_1}/Niagara1_sharing_SBT.xml (100%) rename unit_test/input/{ => basic_test_1}/Niagara1_sharing_ST.xml (100%) rename unit_test/input/{ => basic_test_1}/Niagara2.xml (100%) rename unit_test/input/{ => basic_test_1}/Penryn.xml (100%) rename unit_test/input/{ => basic_test_1}/Xeon.xml (100%) create mode 100644 unit_test/input/serialization_test_1/mp_1.xml create mode 100644 unit_test/input/serialization_test_1/mp_10.xml create mode 100644 unit_test/input/serialization_test_1/mp_11.xml create mode 100644 unit_test/input/serialization_test_1/mp_12.xml create mode 100644 unit_test/input/serialization_test_1/mp_13.xml create mode 100644 unit_test/input/serialization_test_1/mp_14.xml create mode 100644 unit_test/input/serialization_test_1/mp_15.xml create mode 100644 unit_test/input/serialization_test_1/mp_16.xml create mode 100644 unit_test/input/serialization_test_1/mp_17.xml create mode 100644 unit_test/input/serialization_test_1/mp_18.xml create mode 100644 unit_test/input/serialization_test_1/mp_19.xml create mode 100644 unit_test/input/serialization_test_1/mp_2.xml create mode 100644 unit_test/input/serialization_test_1/mp_20.xml create mode 100644 unit_test/input/serialization_test_1/mp_21.xml create mode 100644 unit_test/input/serialization_test_1/mp_22.xml create mode 100644 unit_test/input/serialization_test_1/mp_23.xml create mode 100644 unit_test/input/serialization_test_1/mp_24.xml create mode 100644 unit_test/input/serialization_test_1/mp_25.xml create mode 100644 unit_test/input/serialization_test_1/mp_26.xml create mode 100644 unit_test/input/serialization_test_1/mp_27.xml create mode 100644 unit_test/input/serialization_test_1/mp_28.xml create mode 100644 unit_test/input/serialization_test_1/mp_29.xml create mode 100644 unit_test/input/serialization_test_1/mp_3.xml create mode 100644 unit_test/input/serialization_test_1/mp_30.xml create mode 100644 unit_test/input/serialization_test_1/mp_31.xml create mode 100644 unit_test/input/serialization_test_1/mp_32.xml create mode 100644 unit_test/input/serialization_test_1/mp_33.xml create mode 100644 unit_test/input/serialization_test_1/mp_34.xml create mode 100644 unit_test/input/serialization_test_1/mp_35.xml create mode 100644 unit_test/input/serialization_test_1/mp_36.xml create mode 100644 unit_test/input/serialization_test_1/mp_37.xml create mode 100644 unit_test/input/serialization_test_1/mp_38.xml create mode 100644 unit_test/input/serialization_test_1/mp_39.xml create mode 100644 unit_test/input/serialization_test_1/mp_4.xml create mode 100644 unit_test/input/serialization_test_1/mp_40.xml create mode 100644 unit_test/input/serialization_test_1/mp_41.xml create mode 100644 unit_test/input/serialization_test_1/mp_42.xml create mode 100644 unit_test/input/serialization_test_1/mp_43.xml create mode 100644 unit_test/input/serialization_test_1/mp_44.xml create mode 100644 unit_test/input/serialization_test_1/mp_45.xml create mode 100644 unit_test/input/serialization_test_1/mp_46.xml create mode 100644 unit_test/input/serialization_test_1/mp_47.xml create mode 100644 unit_test/input/serialization_test_1/mp_48.xml create mode 100644 unit_test/input/serialization_test_1/mp_49.xml create mode 100644 unit_test/input/serialization_test_1/mp_5.xml create mode 100644 unit_test/input/serialization_test_1/mp_50.xml create mode 100644 unit_test/input/serialization_test_1/mp_51.xml create mode 100644 unit_test/input/serialization_test_1/mp_6.xml create mode 100644 unit_test/input/serialization_test_1/mp_7.xml create mode 100644 unit_test/input/serialization_test_1/mp_8.xml create mode 100644 unit_test/input/serialization_test_1/mp_9.xml create mode 100644 unit_test/input/serialization_test_2/mp_1.xml create mode 100644 unit_test/input/serialization_test_2/mp_10.xml create mode 100644 unit_test/input/serialization_test_2/mp_11.xml create mode 100644 unit_test/input/serialization_test_2/mp_12.xml create mode 100644 unit_test/input/serialization_test_2/mp_13.xml create mode 100644 unit_test/input/serialization_test_2/mp_14.xml create mode 100644 unit_test/input/serialization_test_2/mp_15.xml create mode 100644 unit_test/input/serialization_test_2/mp_16.xml create mode 100644 unit_test/input/serialization_test_2/mp_17.xml create mode 100644 unit_test/input/serialization_test_2/mp_18.xml create mode 100644 unit_test/input/serialization_test_2/mp_19.xml create mode 100644 unit_test/input/serialization_test_2/mp_2.xml create mode 100644 unit_test/input/serialization_test_2/mp_20.xml create mode 100644 unit_test/input/serialization_test_2/mp_21.xml create mode 100644 unit_test/input/serialization_test_2/mp_22.xml create mode 100644 unit_test/input/serialization_test_2/mp_23.xml create mode 100644 unit_test/input/serialization_test_2/mp_24.xml create mode 100644 unit_test/input/serialization_test_2/mp_25.xml create mode 100644 unit_test/input/serialization_test_2/mp_26.xml create mode 100644 unit_test/input/serialization_test_2/mp_27.xml create mode 100644 unit_test/input/serialization_test_2/mp_28.xml create mode 100644 unit_test/input/serialization_test_2/mp_29.xml create mode 100644 unit_test/input/serialization_test_2/mp_3.xml create mode 100644 unit_test/input/serialization_test_2/mp_30.xml create mode 100644 unit_test/input/serialization_test_2/mp_31.xml create mode 100644 unit_test/input/serialization_test_2/mp_32.xml create mode 100644 unit_test/input/serialization_test_2/mp_33.xml create mode 100644 unit_test/input/serialization_test_2/mp_34.xml create mode 100644 unit_test/input/serialization_test_2/mp_35.xml create mode 100644 unit_test/input/serialization_test_2/mp_36.xml create mode 100644 unit_test/input/serialization_test_2/mp_37.xml create mode 100644 unit_test/input/serialization_test_2/mp_38.xml create mode 100644 unit_test/input/serialization_test_2/mp_39.xml create mode 100644 unit_test/input/serialization_test_2/mp_4.xml create mode 100644 unit_test/input/serialization_test_2/mp_40.xml create mode 100644 unit_test/input/serialization_test_2/mp_41.xml create mode 100644 unit_test/input/serialization_test_2/mp_42.xml create mode 100644 unit_test/input/serialization_test_2/mp_43.xml create mode 100644 unit_test/input/serialization_test_2/mp_44.xml create mode 100644 unit_test/input/serialization_test_2/mp_45.xml create mode 100644 unit_test/input/serialization_test_2/mp_46.xml create mode 100644 unit_test/input/serialization_test_2/mp_47.xml create mode 100644 unit_test/input/serialization_test_2/mp_48.xml create mode 100644 unit_test/input/serialization_test_2/mp_49.xml create mode 100644 unit_test/input/serialization_test_2/mp_5.xml create mode 100644 unit_test/input/serialization_test_2/mp_50.xml create mode 100644 unit_test/input/serialization_test_2/mp_51.xml create mode 100644 unit_test/input/serialization_test_2/mp_6.xml create mode 100644 unit_test/input/serialization_test_2/mp_7.xml create mode 100644 unit_test/input/serialization_test_2/mp_8.xml create mode 100644 unit_test/input/serialization_test_2/mp_9.xml create mode 100644 unit_test/input/serialization_test_3/mp_1.xml create mode 100644 unit_test/input/serialization_test_3/mp_10.xml create mode 100644 unit_test/input/serialization_test_3/mp_11.xml create mode 100644 unit_test/input/serialization_test_3/mp_12.xml create mode 100644 unit_test/input/serialization_test_3/mp_13.xml create mode 100644 unit_test/input/serialization_test_3/mp_14.xml create mode 100644 unit_test/input/serialization_test_3/mp_15.xml create mode 100644 unit_test/input/serialization_test_3/mp_16.xml create mode 100644 unit_test/input/serialization_test_3/mp_17.xml create mode 100644 unit_test/input/serialization_test_3/mp_18.xml create mode 100644 unit_test/input/serialization_test_3/mp_19.xml create mode 100644 unit_test/input/serialization_test_3/mp_2.xml create mode 100644 unit_test/input/serialization_test_3/mp_20.xml create mode 100644 unit_test/input/serialization_test_3/mp_21.xml create mode 100644 unit_test/input/serialization_test_3/mp_22.xml create mode 100644 unit_test/input/serialization_test_3/mp_23.xml create mode 100644 unit_test/input/serialization_test_3/mp_24.xml create mode 100644 unit_test/input/serialization_test_3/mp_25.xml create mode 100644 unit_test/input/serialization_test_3/mp_26.xml create mode 100644 unit_test/input/serialization_test_3/mp_27.xml create mode 100644 unit_test/input/serialization_test_3/mp_28.xml create mode 100644 unit_test/input/serialization_test_3/mp_29.xml create mode 100644 unit_test/input/serialization_test_3/mp_3.xml create mode 100644 unit_test/input/serialization_test_3/mp_30.xml create mode 100644 unit_test/input/serialization_test_3/mp_31.xml create mode 100644 unit_test/input/serialization_test_3/mp_32.xml create mode 100644 unit_test/input/serialization_test_3/mp_33.xml create mode 100644 unit_test/input/serialization_test_3/mp_34.xml create mode 100644 unit_test/input/serialization_test_3/mp_35.xml create mode 100644 unit_test/input/serialization_test_3/mp_36.xml create mode 100644 unit_test/input/serialization_test_3/mp_37.xml create mode 100644 unit_test/input/serialization_test_3/mp_38.xml create mode 100644 unit_test/input/serialization_test_3/mp_39.xml create mode 100644 unit_test/input/serialization_test_3/mp_4.xml create mode 100644 unit_test/input/serialization_test_3/mp_40.xml create mode 100644 unit_test/input/serialization_test_3/mp_41.xml create mode 100644 unit_test/input/serialization_test_3/mp_42.xml create mode 100644 unit_test/input/serialization_test_3/mp_43.xml create mode 100644 unit_test/input/serialization_test_3/mp_44.xml create mode 100644 unit_test/input/serialization_test_3/mp_45.xml create mode 100644 unit_test/input/serialization_test_3/mp_46.xml create mode 100644 unit_test/input/serialization_test_3/mp_47.xml create mode 100644 unit_test/input/serialization_test_3/mp_48.xml create mode 100644 unit_test/input/serialization_test_3/mp_49.xml create mode 100644 unit_test/input/serialization_test_3/mp_5.xml create mode 100644 unit_test/input/serialization_test_3/mp_50.xml create mode 100644 unit_test/input/serialization_test_3/mp_51.xml create mode 100644 unit_test/input/serialization_test_3/mp_6.xml create mode 100644 unit_test/input/serialization_test_3/mp_7.xml create mode 100644 unit_test/input/serialization_test_3/mp_8.xml create mode 100644 unit_test/input/serialization_test_3/mp_9.xml create mode 100644 unit_test/input/serialization_test_4/mp_1.xml create mode 100644 unit_test/input/serialization_test_4/mp_10.xml create mode 100644 unit_test/input/serialization_test_4/mp_11.xml create mode 100644 unit_test/input/serialization_test_4/mp_12.xml create mode 100644 unit_test/input/serialization_test_4/mp_13.xml create mode 100644 unit_test/input/serialization_test_4/mp_14.xml create mode 100644 unit_test/input/serialization_test_4/mp_15.xml create mode 100644 unit_test/input/serialization_test_4/mp_16.xml create mode 100644 unit_test/input/serialization_test_4/mp_17.xml create mode 100644 unit_test/input/serialization_test_4/mp_18.xml create mode 100644 unit_test/input/serialization_test_4/mp_19.xml create mode 100644 unit_test/input/serialization_test_4/mp_2.xml create mode 100644 unit_test/input/serialization_test_4/mp_20.xml create mode 100644 unit_test/input/serialization_test_4/mp_21.xml create mode 100644 unit_test/input/serialization_test_4/mp_22.xml create mode 100644 unit_test/input/serialization_test_4/mp_23.xml create mode 100644 unit_test/input/serialization_test_4/mp_24.xml create mode 100644 unit_test/input/serialization_test_4/mp_25.xml create mode 100644 unit_test/input/serialization_test_4/mp_26.xml create mode 100644 unit_test/input/serialization_test_4/mp_27.xml create mode 100644 unit_test/input/serialization_test_4/mp_28.xml create mode 100644 unit_test/input/serialization_test_4/mp_29.xml create mode 100644 unit_test/input/serialization_test_4/mp_3.xml create mode 100644 unit_test/input/serialization_test_4/mp_30.xml create mode 100644 unit_test/input/serialization_test_4/mp_31.xml create mode 100644 unit_test/input/serialization_test_4/mp_32.xml create mode 100644 unit_test/input/serialization_test_4/mp_33.xml create mode 100644 unit_test/input/serialization_test_4/mp_34.xml create mode 100644 unit_test/input/serialization_test_4/mp_35.xml create mode 100644 unit_test/input/serialization_test_4/mp_36.xml create mode 100644 unit_test/input/serialization_test_4/mp_37.xml create mode 100644 unit_test/input/serialization_test_4/mp_38.xml create mode 100644 unit_test/input/serialization_test_4/mp_39.xml create mode 100644 unit_test/input/serialization_test_4/mp_4.xml create mode 100644 unit_test/input/serialization_test_4/mp_40.xml create mode 100644 unit_test/input/serialization_test_4/mp_41.xml create mode 100644 unit_test/input/serialization_test_4/mp_42.xml create mode 100644 unit_test/input/serialization_test_4/mp_43.xml create mode 100644 unit_test/input/serialization_test_4/mp_44.xml create mode 100644 unit_test/input/serialization_test_4/mp_45.xml create mode 100644 unit_test/input/serialization_test_4/mp_46.xml create mode 100644 unit_test/input/serialization_test_4/mp_47.xml create mode 100644 unit_test/input/serialization_test_4/mp_48.xml create mode 100644 unit_test/input/serialization_test_4/mp_49.xml create mode 100644 unit_test/input/serialization_test_4/mp_5.xml create mode 100644 unit_test/input/serialization_test_4/mp_50.xml create mode 100644 unit_test/input/serialization_test_4/mp_51.xml create mode 100644 unit_test/input/serialization_test_4/mp_6.xml create mode 100644 unit_test/input/serialization_test_4/mp_7.xml create mode 100644 unit_test/input/serialization_test_4/mp_8.xml create mode 100644 unit_test/input/serialization_test_4/mp_9.xml diff --git a/src/logic/dep_resource_conflict_check.cc b/src/logic/dep_resource_conflict_check.cc index 50c2bfc..b8ad798 100644 --- a/src/logic/dep_resource_conflict_check.cc +++ b/src/logic/dep_resource_conflict_check.cc @@ -87,13 +87,15 @@ void dep_resource_conflict_check::set_params(const InputParameter *configure_int local_result = init_interface(&l_ip); - if (coredynp.core_ty == Inorder) + if (coredynp.core_ty == Inorder) { compare_bits += 16 + 8 + 8; // TODO: opcode bits + log(shared resources) + // REG TAG BITS-->opcode comparator - else + } + else { compare_bits += 16 + 8 + 8; + } - conflict_check_power(); + conflict_check_power(); double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; power.writeOp.dynamic *= sckRation; diff --git a/src/noc.cc b/src/noc.cc index 982263a..8dea92b 100644 --- a/src/noc.cc +++ b/src/noc.cc @@ -223,12 +223,12 @@ void NoC::computePower(bool cp) { if (!cp) { set_pppm(pppm_t, 1 * M, 1, 1, 1); // reset traffic pattern router.power = router.power * pppm_t; - set_pppm(pppm_t, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); } + set_pppm(pppm_t, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes, + nocdynp.total_nodes); power = power + router.power * pppm_t; } if (link_bus_exist) { diff --git a/unit_test/golden/ARM_A9_2GHz.golden b/unit_test/golden/basic_test_1/ARM_A9_2GHz.golden similarity index 100% rename from unit_test/golden/ARM_A9_2GHz.golden rename to unit_test/golden/basic_test_1/ARM_A9_2GHz.golden diff --git a/unit_test/golden/ARM_A9_2GHz_withIOC.golden b/unit_test/golden/basic_test_1/ARM_A9_2GHz_withIOC.golden similarity index 100% rename from unit_test/golden/ARM_A9_2GHz_withIOC.golden rename to unit_test/golden/basic_test_1/ARM_A9_2GHz_withIOC.golden diff --git a/unit_test/golden/Alpha21364.golden b/unit_test/golden/basic_test_1/Alpha21364.golden similarity index 100% rename from unit_test/golden/Alpha21364.golden rename to unit_test/golden/basic_test_1/Alpha21364.golden diff --git a/unit_test/golden/Niagara1.golden b/unit_test/golden/basic_test_1/Niagara1.golden similarity index 100% rename from unit_test/golden/Niagara1.golden rename to unit_test/golden/basic_test_1/Niagara1.golden diff --git a/unit_test/golden/Niagara1_sharing_DC.golden b/unit_test/golden/basic_test_1/Niagara1_sharing_DC.golden similarity index 100% rename from unit_test/golden/Niagara1_sharing_DC.golden rename to unit_test/golden/basic_test_1/Niagara1_sharing_DC.golden diff --git a/unit_test/golden/Niagara1_sharing_SBT.golden b/unit_test/golden/basic_test_1/Niagara1_sharing_SBT.golden similarity index 100% rename from unit_test/golden/Niagara1_sharing_SBT.golden rename to unit_test/golden/basic_test_1/Niagara1_sharing_SBT.golden diff --git a/unit_test/golden/Niagara1_sharing_ST.golden b/unit_test/golden/basic_test_1/Niagara1_sharing_ST.golden similarity index 100% rename from unit_test/golden/Niagara1_sharing_ST.golden rename to unit_test/golden/basic_test_1/Niagara1_sharing_ST.golden diff --git a/unit_test/golden/Niagara2.golden b/unit_test/golden/basic_test_1/Niagara2.golden similarity index 100% rename from unit_test/golden/Niagara2.golden rename to unit_test/golden/basic_test_1/Niagara2.golden diff --git a/unit_test/golden/Penryn.golden b/unit_test/golden/basic_test_1/Penryn.golden similarity index 100% rename from unit_test/golden/Penryn.golden rename to unit_test/golden/basic_test_1/Penryn.golden diff --git a/unit_test/golden/Xeon.golden b/unit_test/golden/basic_test_1/Xeon.golden similarity index 100% rename from unit_test/golden/Xeon.golden rename to unit_test/golden/basic_test_1/Xeon.golden diff --git a/unit_test/input/ARM_A9_2GHz.xml b/unit_test/input/basic_test_1/ARM_A9_2GHz.xml similarity index 100% rename from unit_test/input/ARM_A9_2GHz.xml rename to unit_test/input/basic_test_1/ARM_A9_2GHz.xml diff --git a/unit_test/input/ARM_A9_2GHz_withIOC.xml b/unit_test/input/basic_test_1/ARM_A9_2GHz_withIOC.xml similarity index 100% rename from unit_test/input/ARM_A9_2GHz_withIOC.xml rename to unit_test/input/basic_test_1/ARM_A9_2GHz_withIOC.xml diff --git a/unit_test/input/Alpha21364.xml b/unit_test/input/basic_test_1/Alpha21364.xml similarity index 100% rename from unit_test/input/Alpha21364.xml rename to unit_test/input/basic_test_1/Alpha21364.xml diff --git a/unit_test/input/Niagara1.xml b/unit_test/input/basic_test_1/Niagara1.xml similarity index 100% rename from unit_test/input/Niagara1.xml rename to unit_test/input/basic_test_1/Niagara1.xml diff --git a/unit_test/input/Niagara1_sharing_DC.xml b/unit_test/input/basic_test_1/Niagara1_sharing_DC.xml similarity index 100% rename from unit_test/input/Niagara1_sharing_DC.xml rename to unit_test/input/basic_test_1/Niagara1_sharing_DC.xml diff --git a/unit_test/input/Niagara1_sharing_SBT.xml b/unit_test/input/basic_test_1/Niagara1_sharing_SBT.xml similarity index 100% rename from unit_test/input/Niagara1_sharing_SBT.xml rename to unit_test/input/basic_test_1/Niagara1_sharing_SBT.xml diff --git a/unit_test/input/Niagara1_sharing_ST.xml b/unit_test/input/basic_test_1/Niagara1_sharing_ST.xml similarity index 100% rename from unit_test/input/Niagara1_sharing_ST.xml rename to unit_test/input/basic_test_1/Niagara1_sharing_ST.xml diff --git a/unit_test/input/Niagara2.xml b/unit_test/input/basic_test_1/Niagara2.xml similarity index 100% rename from unit_test/input/Niagara2.xml rename to unit_test/input/basic_test_1/Niagara2.xml diff --git a/unit_test/input/Penryn.xml b/unit_test/input/basic_test_1/Penryn.xml similarity index 100% rename from unit_test/input/Penryn.xml rename to unit_test/input/basic_test_1/Penryn.xml diff --git a/unit_test/input/Xeon.xml b/unit_test/input/basic_test_1/Xeon.xml similarity index 100% rename from unit_test/input/Xeon.xml rename to unit_test/input/basic_test_1/Xeon.xml diff --git a/unit_test/input/serialization_test_1/mp_1.xml b/unit_test/input/serialization_test_1/mp_1.xml new file mode 100644 index 0000000..1a4f24d --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_1.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_10.xml b/unit_test/input/serialization_test_1/mp_10.xml new file mode 100644 index 0000000..72b1b68 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_10.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_11.xml b/unit_test/input/serialization_test_1/mp_11.xml new file mode 100644 index 0000000..52548ad --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_11.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_12.xml b/unit_test/input/serialization_test_1/mp_12.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_12.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_13.xml b/unit_test/input/serialization_test_1/mp_13.xml new file mode 100644 index 0000000..6670a1f --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_13.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_14.xml b/unit_test/input/serialization_test_1/mp_14.xml new file mode 100644 index 0000000..4bc40f6 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_14.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_15.xml b/unit_test/input/serialization_test_1/mp_15.xml new file mode 100644 index 0000000..a78603c --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_15.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_16.xml b/unit_test/input/serialization_test_1/mp_16.xml new file mode 100644 index 0000000..5d7a10f --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_16.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_17.xml b/unit_test/input/serialization_test_1/mp_17.xml new file mode 100644 index 0000000..e60ed8f --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_17.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_18.xml b/unit_test/input/serialization_test_1/mp_18.xml new file mode 100644 index 0000000..7ddaf90 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_18.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_19.xml b/unit_test/input/serialization_test_1/mp_19.xml new file mode 100644 index 0000000..e49ca73 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_19.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_2.xml b/unit_test/input/serialization_test_1/mp_2.xml new file mode 100644 index 0000000..e316bf8 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_2.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_20.xml b/unit_test/input/serialization_test_1/mp_20.xml new file mode 100644 index 0000000..13ae2fe --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_20.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_21.xml b/unit_test/input/serialization_test_1/mp_21.xml new file mode 100644 index 0000000..c9ad9c1 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_21.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_22.xml b/unit_test/input/serialization_test_1/mp_22.xml new file mode 100644 index 0000000..3f99521 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_22.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_23.xml b/unit_test/input/serialization_test_1/mp_23.xml new file mode 100644 index 0000000..2080b16 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_23.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_24.xml b/unit_test/input/serialization_test_1/mp_24.xml new file mode 100644 index 0000000..9ead306 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_24.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_25.xml b/unit_test/input/serialization_test_1/mp_25.xml new file mode 100644 index 0000000..ecf0eb1 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_25.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_26.xml b/unit_test/input/serialization_test_1/mp_26.xml new file mode 100644 index 0000000..c498433 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_26.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_27.xml b/unit_test/input/serialization_test_1/mp_27.xml new file mode 100644 index 0000000..c7cbdae --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_27.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_28.xml b/unit_test/input/serialization_test_1/mp_28.xml new file mode 100644 index 0000000..ecbe232 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_28.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_29.xml b/unit_test/input/serialization_test_1/mp_29.xml new file mode 100644 index 0000000..9ad965c --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_29.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_3.xml b/unit_test/input/serialization_test_1/mp_3.xml new file mode 100644 index 0000000..1f65fba --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_3.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_30.xml b/unit_test/input/serialization_test_1/mp_30.xml new file mode 100644 index 0000000..d4da444 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_30.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_31.xml b/unit_test/input/serialization_test_1/mp_31.xml new file mode 100644 index 0000000..bdb5453 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_31.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_32.xml b/unit_test/input/serialization_test_1/mp_32.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_32.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_33.xml b/unit_test/input/serialization_test_1/mp_33.xml new file mode 100644 index 0000000..13c4b79 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_33.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_34.xml b/unit_test/input/serialization_test_1/mp_34.xml new file mode 100644 index 0000000..ed59f6a --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_34.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_35.xml b/unit_test/input/serialization_test_1/mp_35.xml new file mode 100644 index 0000000..4ea0153 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_35.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_36.xml b/unit_test/input/serialization_test_1/mp_36.xml new file mode 100644 index 0000000..658163c --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_36.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_37.xml b/unit_test/input/serialization_test_1/mp_37.xml new file mode 100644 index 0000000..14bdb76 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_37.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_38.xml b/unit_test/input/serialization_test_1/mp_38.xml new file mode 100644 index 0000000..42fa9e2 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_38.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_39.xml b/unit_test/input/serialization_test_1/mp_39.xml new file mode 100644 index 0000000..8c7b187 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_39.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_4.xml b/unit_test/input/serialization_test_1/mp_4.xml new file mode 100644 index 0000000..1779bf8 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_4.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_40.xml b/unit_test/input/serialization_test_1/mp_40.xml new file mode 100644 index 0000000..7b5eb37 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_40.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_41.xml b/unit_test/input/serialization_test_1/mp_41.xml new file mode 100644 index 0000000..751eac9 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_41.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_42.xml b/unit_test/input/serialization_test_1/mp_42.xml new file mode 100644 index 0000000..8ded793 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_42.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_43.xml b/unit_test/input/serialization_test_1/mp_43.xml new file mode 100644 index 0000000..fbf5288 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_43.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_44.xml b/unit_test/input/serialization_test_1/mp_44.xml new file mode 100644 index 0000000..6745412 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_44.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_45.xml b/unit_test/input/serialization_test_1/mp_45.xml new file mode 100644 index 0000000..f426bd9 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_45.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_46.xml b/unit_test/input/serialization_test_1/mp_46.xml new file mode 100644 index 0000000..a60d390 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_46.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_47.xml b/unit_test/input/serialization_test_1/mp_47.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_47.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_48.xml b/unit_test/input/serialization_test_1/mp_48.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_48.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_49.xml b/unit_test/input/serialization_test_1/mp_49.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_49.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_5.xml b/unit_test/input/serialization_test_1/mp_5.xml new file mode 100644 index 0000000..625b71e --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_5.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_50.xml b/unit_test/input/serialization_test_1/mp_50.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_50.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_51.xml b/unit_test/input/serialization_test_1/mp_51.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_51.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_6.xml b/unit_test/input/serialization_test_1/mp_6.xml new file mode 100644 index 0000000..e20bd7c --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_6.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_7.xml b/unit_test/input/serialization_test_1/mp_7.xml new file mode 100644 index 0000000..d28faea --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_7.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_8.xml b/unit_test/input/serialization_test_1/mp_8.xml new file mode 100644 index 0000000..a92aa52 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_8.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_1/mp_9.xml b/unit_test/input/serialization_test_1/mp_9.xml new file mode 100644 index 0000000..d73e760 --- /dev/null +++ b/unit_test/input/serialization_test_1/mp_9.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_1.xml b/unit_test/input/serialization_test_2/mp_1.xml new file mode 100644 index 0000000..8c006bd --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_1.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_10.xml b/unit_test/input/serialization_test_2/mp_10.xml new file mode 100644 index 0000000..1aa0363 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_10.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_11.xml b/unit_test/input/serialization_test_2/mp_11.xml new file mode 100644 index 0000000..ca1749f --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_11.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_12.xml b/unit_test/input/serialization_test_2/mp_12.xml new file mode 100644 index 0000000..d90bc66 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_12.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_13.xml b/unit_test/input/serialization_test_2/mp_13.xml new file mode 100644 index 0000000..c66747f --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_13.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_14.xml b/unit_test/input/serialization_test_2/mp_14.xml new file mode 100644 index 0000000..57f518a --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_14.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_15.xml b/unit_test/input/serialization_test_2/mp_15.xml new file mode 100644 index 0000000..f0b1741 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_15.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_16.xml b/unit_test/input/serialization_test_2/mp_16.xml new file mode 100644 index 0000000..102212d --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_16.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_17.xml b/unit_test/input/serialization_test_2/mp_17.xml new file mode 100644 index 0000000..1708d62 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_17.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_18.xml b/unit_test/input/serialization_test_2/mp_18.xml new file mode 100644 index 0000000..c09ebe2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_18.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_19.xml b/unit_test/input/serialization_test_2/mp_19.xml new file mode 100644 index 0000000..90c29f3 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_19.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_2.xml b/unit_test/input/serialization_test_2/mp_2.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_2.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_20.xml b/unit_test/input/serialization_test_2/mp_20.xml new file mode 100644 index 0000000..a963db0 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_20.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_21.xml b/unit_test/input/serialization_test_2/mp_21.xml new file mode 100644 index 0000000..f8b8eba --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_21.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_22.xml b/unit_test/input/serialization_test_2/mp_22.xml new file mode 100644 index 0000000..d0aef4e --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_22.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_23.xml b/unit_test/input/serialization_test_2/mp_23.xml new file mode 100644 index 0000000..205caf3 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_23.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_24.xml b/unit_test/input/serialization_test_2/mp_24.xml new file mode 100644 index 0000000..caef4a0 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_24.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_25.xml b/unit_test/input/serialization_test_2/mp_25.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_25.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_26.xml b/unit_test/input/serialization_test_2/mp_26.xml new file mode 100644 index 0000000..2738876 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_26.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_27.xml b/unit_test/input/serialization_test_2/mp_27.xml new file mode 100644 index 0000000..b5577b4 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_27.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_28.xml b/unit_test/input/serialization_test_2/mp_28.xml new file mode 100644 index 0000000..c6b85ab --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_28.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_29.xml b/unit_test/input/serialization_test_2/mp_29.xml new file mode 100644 index 0000000..0a9a2d6 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_29.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_3.xml b/unit_test/input/serialization_test_2/mp_3.xml new file mode 100644 index 0000000..c51e0ad --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_3.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_30.xml b/unit_test/input/serialization_test_2/mp_30.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_30.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_31.xml b/unit_test/input/serialization_test_2/mp_31.xml new file mode 100644 index 0000000..852fe21 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_31.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_32.xml b/unit_test/input/serialization_test_2/mp_32.xml new file mode 100644 index 0000000..1d1baa8 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_32.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_33.xml b/unit_test/input/serialization_test_2/mp_33.xml new file mode 100644 index 0000000..9c193ed --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_33.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_34.xml b/unit_test/input/serialization_test_2/mp_34.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_34.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_35.xml b/unit_test/input/serialization_test_2/mp_35.xml new file mode 100644 index 0000000..8032942 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_35.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_36.xml b/unit_test/input/serialization_test_2/mp_36.xml new file mode 100644 index 0000000..509d89d --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_36.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_37.xml b/unit_test/input/serialization_test_2/mp_37.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_37.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_38.xml b/unit_test/input/serialization_test_2/mp_38.xml new file mode 100644 index 0000000..a60d390 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_38.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_39.xml b/unit_test/input/serialization_test_2/mp_39.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_39.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_4.xml b/unit_test/input/serialization_test_2/mp_4.xml new file mode 100644 index 0000000..38e3cb0 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_4.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_40.xml b/unit_test/input/serialization_test_2/mp_40.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_40.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_41.xml b/unit_test/input/serialization_test_2/mp_41.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_41.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_42.xml b/unit_test/input/serialization_test_2/mp_42.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_42.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_43.xml b/unit_test/input/serialization_test_2/mp_43.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_43.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_44.xml b/unit_test/input/serialization_test_2/mp_44.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_44.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_45.xml b/unit_test/input/serialization_test_2/mp_45.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_45.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_46.xml b/unit_test/input/serialization_test_2/mp_46.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_46.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_47.xml b/unit_test/input/serialization_test_2/mp_47.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_47.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_48.xml b/unit_test/input/serialization_test_2/mp_48.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_48.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_49.xml b/unit_test/input/serialization_test_2/mp_49.xml new file mode 100644 index 0000000..af3ef73 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_49.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_5.xml b/unit_test/input/serialization_test_2/mp_5.xml new file mode 100644 index 0000000..d9ecb96 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_5.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_50.xml b/unit_test/input/serialization_test_2/mp_50.xml new file mode 100644 index 0000000..c2617b2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_50.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_51.xml b/unit_test/input/serialization_test_2/mp_51.xml new file mode 100644 index 0000000..e48c198 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_51.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_6.xml b/unit_test/input/serialization_test_2/mp_6.xml new file mode 100644 index 0000000..913f9c2 --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_6.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_7.xml b/unit_test/input/serialization_test_2/mp_7.xml new file mode 100644 index 0000000..be4660d --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_7.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_8.xml b/unit_test/input/serialization_test_2/mp_8.xml new file mode 100644 index 0000000..6ae72ab --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_8.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_2/mp_9.xml b/unit_test/input/serialization_test_2/mp_9.xml new file mode 100644 index 0000000..c96a58e --- /dev/null +++ b/unit_test/input/serialization_test_2/mp_9.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_1.xml b/unit_test/input/serialization_test_3/mp_1.xml new file mode 100644 index 0000000..9533b58 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_1.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_10.xml b/unit_test/input/serialization_test_3/mp_10.xml new file mode 100644 index 0000000..33a8f14 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_10.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_11.xml b/unit_test/input/serialization_test_3/mp_11.xml new file mode 100644 index 0000000..5649ac8 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_11.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_12.xml b/unit_test/input/serialization_test_3/mp_12.xml new file mode 100644 index 0000000..03307b2 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_12.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_13.xml b/unit_test/input/serialization_test_3/mp_13.xml new file mode 100644 index 0000000..bb8e73f --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_13.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_14.xml b/unit_test/input/serialization_test_3/mp_14.xml new file mode 100644 index 0000000..a4a2ca7 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_14.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_15.xml b/unit_test/input/serialization_test_3/mp_15.xml new file mode 100644 index 0000000..95e3aad --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_15.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_16.xml b/unit_test/input/serialization_test_3/mp_16.xml new file mode 100644 index 0000000..05fc857 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_16.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_17.xml b/unit_test/input/serialization_test_3/mp_17.xml new file mode 100644 index 0000000..b32f8c7 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_17.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_18.xml b/unit_test/input/serialization_test_3/mp_18.xml new file mode 100644 index 0000000..b4d7429 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_18.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_19.xml b/unit_test/input/serialization_test_3/mp_19.xml new file mode 100644 index 0000000..57e623e --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_19.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_2.xml b/unit_test/input/serialization_test_3/mp_2.xml new file mode 100644 index 0000000..cb51e36 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_2.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_20.xml b/unit_test/input/serialization_test_3/mp_20.xml new file mode 100644 index 0000000..2ad3540 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_20.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_21.xml b/unit_test/input/serialization_test_3/mp_21.xml new file mode 100644 index 0000000..08fba69 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_21.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_22.xml b/unit_test/input/serialization_test_3/mp_22.xml new file mode 100644 index 0000000..24a4b11 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_22.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_23.xml b/unit_test/input/serialization_test_3/mp_23.xml new file mode 100644 index 0000000..50ef969 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_23.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_24.xml b/unit_test/input/serialization_test_3/mp_24.xml new file mode 100644 index 0000000..b8c2501 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_24.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_25.xml b/unit_test/input/serialization_test_3/mp_25.xml new file mode 100644 index 0000000..b519398 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_25.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_26.xml b/unit_test/input/serialization_test_3/mp_26.xml new file mode 100644 index 0000000..b41d7d4 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_26.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_27.xml b/unit_test/input/serialization_test_3/mp_27.xml new file mode 100644 index 0000000..e834b6f --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_27.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_28.xml b/unit_test/input/serialization_test_3/mp_28.xml new file mode 100644 index 0000000..ad0bbb2 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_28.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_29.xml b/unit_test/input/serialization_test_3/mp_29.xml new file mode 100644 index 0000000..c7b2743 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_29.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_3.xml b/unit_test/input/serialization_test_3/mp_3.xml new file mode 100644 index 0000000..30d2596 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_3.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_30.xml b/unit_test/input/serialization_test_3/mp_30.xml new file mode 100644 index 0000000..b2e2f9a --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_30.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_31.xml b/unit_test/input/serialization_test_3/mp_31.xml new file mode 100644 index 0000000..a9fc3f2 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_31.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_32.xml b/unit_test/input/serialization_test_3/mp_32.xml new file mode 100644 index 0000000..0b0da8e --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_32.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_33.xml b/unit_test/input/serialization_test_3/mp_33.xml new file mode 100644 index 0000000..b19f4a7 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_33.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_34.xml b/unit_test/input/serialization_test_3/mp_34.xml new file mode 100644 index 0000000..14345a1 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_34.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_35.xml b/unit_test/input/serialization_test_3/mp_35.xml new file mode 100644 index 0000000..a3753a8 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_35.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_36.xml b/unit_test/input/serialization_test_3/mp_36.xml new file mode 100644 index 0000000..abbfb62 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_36.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_37.xml b/unit_test/input/serialization_test_3/mp_37.xml new file mode 100644 index 0000000..68bede4 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_37.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_38.xml b/unit_test/input/serialization_test_3/mp_38.xml new file mode 100644 index 0000000..6f5614c --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_38.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_39.xml b/unit_test/input/serialization_test_3/mp_39.xml new file mode 100644 index 0000000..cc31cba --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_39.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_4.xml b/unit_test/input/serialization_test_3/mp_4.xml new file mode 100644 index 0000000..010d09c --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_4.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_40.xml b/unit_test/input/serialization_test_3/mp_40.xml new file mode 100644 index 0000000..1b221ae --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_40.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_41.xml b/unit_test/input/serialization_test_3/mp_41.xml new file mode 100644 index 0000000..2aa845d --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_41.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_42.xml b/unit_test/input/serialization_test_3/mp_42.xml new file mode 100644 index 0000000..eeb5266 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_42.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_43.xml b/unit_test/input/serialization_test_3/mp_43.xml new file mode 100644 index 0000000..fee74f6 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_43.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_44.xml b/unit_test/input/serialization_test_3/mp_44.xml new file mode 100644 index 0000000..f046816 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_44.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_45.xml b/unit_test/input/serialization_test_3/mp_45.xml new file mode 100644 index 0000000..3276a6e --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_45.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_46.xml b/unit_test/input/serialization_test_3/mp_46.xml new file mode 100644 index 0000000..75039cb --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_46.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_47.xml b/unit_test/input/serialization_test_3/mp_47.xml new file mode 100644 index 0000000..1ffea81 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_47.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_48.xml b/unit_test/input/serialization_test_3/mp_48.xml new file mode 100644 index 0000000..f8d269d --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_48.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_49.xml b/unit_test/input/serialization_test_3/mp_49.xml new file mode 100644 index 0000000..91abeb7 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_49.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_5.xml b/unit_test/input/serialization_test_3/mp_5.xml new file mode 100644 index 0000000..e234a84 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_5.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_50.xml b/unit_test/input/serialization_test_3/mp_50.xml new file mode 100644 index 0000000..65b494b --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_50.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_51.xml b/unit_test/input/serialization_test_3/mp_51.xml new file mode 100644 index 0000000..ab07584 --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_51.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_6.xml b/unit_test/input/serialization_test_3/mp_6.xml new file mode 100644 index 0000000..509d89d --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_6.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_7.xml b/unit_test/input/serialization_test_3/mp_7.xml new file mode 100644 index 0000000..be9c96a --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_7.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_8.xml b/unit_test/input/serialization_test_3/mp_8.xml new file mode 100644 index 0000000..b3d137d --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_8.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_3/mp_9.xml b/unit_test/input/serialization_test_3/mp_9.xml new file mode 100644 index 0000000..a24683b --- /dev/null +++ b/unit_test/input/serialization_test_3/mp_9.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_1.xml b/unit_test/input/serialization_test_4/mp_1.xml new file mode 100644 index 0000000..0047764 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_1.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_10.xml b/unit_test/input/serialization_test_4/mp_10.xml new file mode 100644 index 0000000..4964b9c --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_10.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_11.xml b/unit_test/input/serialization_test_4/mp_11.xml new file mode 100644 index 0000000..a24aa22 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_11.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_12.xml b/unit_test/input/serialization_test_4/mp_12.xml new file mode 100644 index 0000000..bdc0cc5 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_12.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_13.xml b/unit_test/input/serialization_test_4/mp_13.xml new file mode 100644 index 0000000..555b9d6 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_13.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_14.xml b/unit_test/input/serialization_test_4/mp_14.xml new file mode 100644 index 0000000..91f9afe --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_14.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_15.xml b/unit_test/input/serialization_test_4/mp_15.xml new file mode 100644 index 0000000..60acfea --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_15.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_16.xml b/unit_test/input/serialization_test_4/mp_16.xml new file mode 100644 index 0000000..0438fd0 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_16.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_17.xml b/unit_test/input/serialization_test_4/mp_17.xml new file mode 100644 index 0000000..6e2ca37 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_17.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_18.xml b/unit_test/input/serialization_test_4/mp_18.xml new file mode 100644 index 0000000..cc22dc1 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_18.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_19.xml b/unit_test/input/serialization_test_4/mp_19.xml new file mode 100644 index 0000000..a056d59 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_19.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_2.xml b/unit_test/input/serialization_test_4/mp_2.xml new file mode 100644 index 0000000..8ebcfff --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_2.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_20.xml b/unit_test/input/serialization_test_4/mp_20.xml new file mode 100644 index 0000000..4f2a13e --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_20.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_21.xml b/unit_test/input/serialization_test_4/mp_21.xml new file mode 100644 index 0000000..7a4207e --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_21.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_22.xml b/unit_test/input/serialization_test_4/mp_22.xml new file mode 100644 index 0000000..43d42e9 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_22.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_23.xml b/unit_test/input/serialization_test_4/mp_23.xml new file mode 100644 index 0000000..d7ff2b8 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_23.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_24.xml b/unit_test/input/serialization_test_4/mp_24.xml new file mode 100644 index 0000000..575427d --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_24.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_25.xml b/unit_test/input/serialization_test_4/mp_25.xml new file mode 100644 index 0000000..c87ef79 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_25.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_26.xml b/unit_test/input/serialization_test_4/mp_26.xml new file mode 100644 index 0000000..96bf93a --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_26.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_27.xml b/unit_test/input/serialization_test_4/mp_27.xml new file mode 100644 index 0000000..aaa7b13 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_27.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_28.xml b/unit_test/input/serialization_test_4/mp_28.xml new file mode 100644 index 0000000..255cf64 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_28.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_29.xml b/unit_test/input/serialization_test_4/mp_29.xml new file mode 100644 index 0000000..0d75565 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_29.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_3.xml b/unit_test/input/serialization_test_4/mp_3.xml new file mode 100644 index 0000000..5a8b2b4 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_3.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_30.xml b/unit_test/input/serialization_test_4/mp_30.xml new file mode 100644 index 0000000..fca6f0e --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_30.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_31.xml b/unit_test/input/serialization_test_4/mp_31.xml new file mode 100644 index 0000000..3147eec --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_31.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_32.xml b/unit_test/input/serialization_test_4/mp_32.xml new file mode 100644 index 0000000..1858c7f --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_32.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_33.xml b/unit_test/input/serialization_test_4/mp_33.xml new file mode 100644 index 0000000..693d3cf --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_33.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_34.xml b/unit_test/input/serialization_test_4/mp_34.xml new file mode 100644 index 0000000..727d5ab --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_34.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_35.xml b/unit_test/input/serialization_test_4/mp_35.xml new file mode 100644 index 0000000..4c7d573 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_35.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_36.xml b/unit_test/input/serialization_test_4/mp_36.xml new file mode 100644 index 0000000..bd1fde7 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_36.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_37.xml b/unit_test/input/serialization_test_4/mp_37.xml new file mode 100644 index 0000000..95b6b43 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_37.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_38.xml b/unit_test/input/serialization_test_4/mp_38.xml new file mode 100644 index 0000000..1858c7f --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_38.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_39.xml b/unit_test/input/serialization_test_4/mp_39.xml new file mode 100644 index 0000000..693d3cf --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_39.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_4.xml b/unit_test/input/serialization_test_4/mp_4.xml new file mode 100644 index 0000000..0cd7901 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_4.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_40.xml b/unit_test/input/serialization_test_4/mp_40.xml new file mode 100644 index 0000000..ac86da6 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_40.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_41.xml b/unit_test/input/serialization_test_4/mp_41.xml new file mode 100644 index 0000000..8a82360 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_41.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_42.xml b/unit_test/input/serialization_test_4/mp_42.xml new file mode 100644 index 0000000..661c3c8 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_42.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_43.xml b/unit_test/input/serialization_test_4/mp_43.xml new file mode 100644 index 0000000..407eb55 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_43.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_44.xml b/unit_test/input/serialization_test_4/mp_44.xml new file mode 100644 index 0000000..177cbd7 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_44.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_45.xml b/unit_test/input/serialization_test_4/mp_45.xml new file mode 100644 index 0000000..306fd42 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_45.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_46.xml b/unit_test/input/serialization_test_4/mp_46.xml new file mode 100644 index 0000000..49aa2f0 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_46.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_47.xml b/unit_test/input/serialization_test_4/mp_47.xml new file mode 100644 index 0000000..06609e8 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_47.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_48.xml b/unit_test/input/serialization_test_4/mp_48.xml new file mode 100644 index 0000000..1858c7f --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_48.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_49.xml b/unit_test/input/serialization_test_4/mp_49.xml new file mode 100644 index 0000000..693d3cf --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_49.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_5.xml b/unit_test/input/serialization_test_4/mp_5.xml new file mode 100644 index 0000000..ffb1e75 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_5.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_50.xml b/unit_test/input/serialization_test_4/mp_50.xml new file mode 100644 index 0000000..95b6b43 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_50.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_51.xml b/unit_test/input/serialization_test_4/mp_51.xml new file mode 100644 index 0000000..1858c7f --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_51.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_6.xml b/unit_test/input/serialization_test_4/mp_6.xml new file mode 100644 index 0000000..5699d66 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_6.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_7.xml b/unit_test/input/serialization_test_4/mp_7.xml new file mode 100644 index 0000000..dd5a9f4 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_7.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_8.xml b/unit_test/input/serialization_test_4/mp_8.xml new file mode 100644 index 0000000..0c33c62 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_8.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/input/serialization_test_4/mp_9.xml b/unit_test/input/serialization_test_4/mp_9.xml new file mode 100644 index 0000000..c8c9ce7 --- /dev/null +++ b/unit_test/input/serialization_test_4/mp_9.xml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index 1e3e339..6446a77 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -39,16 +39,16 @@ timeout_limit = 120.0 kill_flag = False -input_path = "./input" -output_path = "./output" -golden_path = "./golden" - -#parser = argparse.ArgumentParser() -#parser.add_argument('--input', type=str, default="", help="input path") -#parser.add_argument('--warmup', type=int, default=100000, help="time in nanoseconds of the warmup") -#parser.add_argument('--end', type=int, default=0, help="time in nanoseconds of end of the plot") -#args = parser.parse_args() +parser = argparse.ArgumentParser() +parser.add_argument('--input', type=str, default="./input/basic_test_1", help="Test Input Path") +parser.add_argument("--output", type=str, default="./output/basic_test_1", help="Test Output Path") +parser.add_argument("--golden", type=str, default="./golden/basic_test_1", help="Test Golden Path") +parser.add_argument("--serial", type=bool, default=False, help="Serial if true, Basic if false" +args = parser.parse_args() +input_path = args.input +output_path = args.output +golden_path = args.golden def print_info(info, *args): if verbose: @@ -161,7 +161,8 @@ def run_test_serializaiton_create(vector): if kill_flag: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 - if os.stat(os.path.join(output_path, vector + ".err")).st_size == 0: + if (os.stat(os.path.join(output_path, vector + ".err")).st_size == 0) and + (os.stat(os.path.join(output_path, vector + ".txt")).st_size > 0): print_pass(vector) return 0 else: @@ -169,11 +170,11 @@ def run_test_serializaiton_create(vector): return 0 -def run_test_serialization_restore(vector): +def run_test_serialization_restore(vector, sfile): global kill_flag kill_flag = False infile = os.path.join(input_path, vector + ".xml") - sname = os.path.join(output_path, vector + ".txt") + sname = os.path.join(output_path, sfile + ".txt") stdo = os.path.join(output_path, vector + ".out") stde = os.path.join(output_path, vector + ".err") with open(stdo, "w") as so, open(stde, "w") as se: @@ -214,19 +215,23 @@ def get_vectors(): print_info(start) vectors = get_vectors() print_info("Found " + str(len(vectors)) + " test vectors") - #for vector in vectors: - # if run_test_normal(vector) == 0: - # p += 1 - # else: - # f += 1 - #for vector in vectors: - # if run_test_serializaiton_create(vector) == 0: - # p += 1 - # else: - # f += 1 for vector in vectors: - if run_test_serialization_restore(vector) == 0: - p += 1 - else: - f += 1 + if not args.serial: + if run_test_normal(vector) == 0: + p += 1 + else: + f += 1 + else: + # Create a Serialized File: + for vector in vectors: + if run_test_serializaiton_create(vector) == 0: + p += 1 + else: + f += 1 + # Use Serialized File for Remainder of Tests: + for vector in vectors: + if run_test_serialization_restore(vector, vector[0]) == 0: + p += 1 + else: + f += 1 print_results(p, f, len(vectors)) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 0b99e79..8fb1695 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -26,6 +26,17 @@ print_info () { echo -e "[ $script_name ] $1" } +#-------------------------------------------------------------------- +# +# +# +#-------------------------------------------------------------------- +TESTS=("basic_test_1" + "serialization_test_1" + "serialization_test_2" + "serialization_test_3" + "serialization_test_4") + #-------------------------------------------------------------------- # Output Directories # ___ _ _ _____ ____ _ _ _____ ____ ___ ____ @@ -40,9 +51,12 @@ if [ ! -d $OUTPUT ]; then print_info "Creating $OUTPUT" mkdir -p $OUTPUT #else - #print_info "Cleaning $OUTPUT" - #rm -f $OUTPUT/* +# print_info "Cleaning $OUTPUT" +# rm -f $OUTPUT/* fi +for test_set in ${TESTS[@]}; do + mkdir -p $OUTPUT/$test_set +done #-------------------------------------------------------------------- # Run Tests From 47df78e2860ab3adcf7362edb1b1b7c43ca26e9d Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 23 Jun 2020 14:44:50 -0500 Subject: [PATCH 44/59] preliminary IF unit changes --- src/core/instfetch.cc | 979 +++----------------------------------- src/core/instfetch.h | 82 +--- src/logic/inst_decoder.cc | 8 +- src/logic/inst_decoder.h | 4 +- 4 files changed, 87 insertions(+), 986 deletions(-) diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 3fac327..8d0016e 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -1,821 +1,3 @@ -// /***************************************************************************** -// * McPAT -// * SOFTWARE LICENSE AGREEMENT -// * Copyright 2012 Hewlett-Packard Development Company, L.P. -// * All Rights Reserved -// * -// * Redistribution and use in source and binary forms, with or without -// * modification, are permitted provided that the following conditions are -// * met: redistributions of source code must retain the above copyright -// * notice, this list of conditions and the following disclaimer; -// * redistributions in binary form must reproduce the above copyright -// * notice, this list of conditions and the following disclaimer in the -// * documentation and/or other materials provided with the distribution; -// * neither the name of the copyright holders nor the names of its -// * contributors may be used to endorse or promote products derived from -// * this software without specific prior written permission. - -// * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” -// * -// ***************************************************************************/ - -// #include "instfetch.h" - -// #include "XML_Parse.h" -// #include "basic_circuit.h" -// #include "const.h" -// #include "io.h" -// #include "parameter.h" - -// #include -// #include -// #include -// #include -// #include - -// InstFetchU::InstFetchU(const ParseXML *XML_interface, -// int ithCore_, -// InputParameter *interface_ip_, -// const CoreDynParam &dyn_p_, -// bool exist_) -// : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), -// coredynp(dyn_p_), IB(0), BTB(0), exist(exist_) { -// if (!exist) -// return; -// int idx, tag, data, size, line, assoc, banks; -// bool debug = false, is_default = true; - -// clockRate = coredynp.clockRate; -// executionTime = coredynp.executionTime; -// cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; -// // Assuming all L1 caches are virtually idxed physically tagged. -// // cache - -// size = (int)XML->sys.core[ithCore].icache.icache_config[0]; -// line = (int)XML->sys.core[ithCore].icache.icache_config[1]; -// assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; -// banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; -// idx = debug ? 9 : int(ceil(log2(size / line / assoc))); -// tag = debug ? 51 -// : (int)XML->sys.physical_address_width - idx - -// int(ceil(log2(line))) + EXTRA_TAG_BITS; -// interface_ip.specific_tag = 1; -// interface_ip.tag_w = tag; -// interface_ip.cache_sz = -// debug ? 32768 : (int)XML->sys.core[ithCore].icache.icache_config[0]; -// interface_ip.line_sz = -// debug ? 64 : (int)XML->sys.core[ithCore].icache.icache_config[1]; -// interface_ip.assoc = -// debug ? 8 : (int)XML->sys.core[ithCore].icache.icache_config[2]; -// interface_ip.nbanks = -// debug ? 1 : (int)XML->sys.core[ithCore].icache.icache_config[3]; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = -// 0; // debug?0:XML->sys.core[ithCore].icache.icache_config[5]; -// interface_ip.throughput = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; -// interface_ip.latency = -// debug ? 3.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; -// interface_ip.is_cache = true; -// interface_ip.pure_cam = false; -// interface_ip.pure_ram = false; -// // interface_ip.obj_func_dyn_energy = 0; -// // interface_ip.obj_func_dyn_power = 0; -// // interface_ip.obj_func_leak_power = 0; -// // interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = -// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; -// interface_ip.num_rd_ports = 0; -// interface_ip.num_wr_ports = 0; -// interface_ip.num_se_rd_ports = 0; -// icache.caches = new ArrayST(&interface_ip, -// "icache", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// scktRatio = g_tp.sckt_co_eff; -// chip_PR_overhead = g_tp.chip_layout_overhead; -// macro_PR_overhead = g_tp.macro_layout_overhead; -// icache.area.set_area(icache.area.get_area() + -// icache.caches->local_result.area); -// area.set_area(area.get_area() + icache.caches->local_result.area); -// // output_data_csv(icache.caches.local_result); - -// /* -// *iCache controllers -// *miss buffer Each MSHR contains enough state -// *to handle one or more accesses of any type to a single memory line. -// *Due to the generality of the MSHR mechanism, -// *the amount of state involved is non-trivial: -// *including the address, pointers to the cache entry and destination register, -// *written data, and various other pieces of state. -// */ -// interface_ip.num_search_ports = -// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; -// tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; -// data = (XML->sys.physical_address_width) + int(ceil(log2(size / line))) + -// icache.caches->l_ip.line_sz * 8; -// interface_ip.specific_tag = 1; -// interface_ip.tag_w = tag; -// interface_ip.line_sz = -// int(ceil(data / 8.0)); // int(ceil(pow(2.0,ceil(log2(data)))/8.0)); -// interface_ip.cache_sz = -// XML->sys.core[ithCore].icache.buffer_sizes[0] * interface_ip.line_sz; -// interface_ip.assoc = 0; -// interface_ip.nbanks = 1; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[4] / -// clockRate; // means cycle time -// interface_ip.latency = debug -// ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[5] / -// clockRate; // means access time -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = -// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; -// interface_ip.num_rd_ports = 0; -// interface_ip.num_wr_ports = 0; -// interface_ip.num_se_rd_ports = 0; -// interface_ip.num_search_ports = -// XML->sys.core[ithCore].number_instruction_fetch_ports; -// icache.missb = new ArrayST(&interface_ip, -// "icacheMissBuffer", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// icache.area.set_area(icache.area.get_area() + -// icache.missb->local_result.area); -// area.set_area(area.get_area() + icache.missb->local_result.area); -// // output_data_csv(icache.missb.local_result); - -// // fill buffer -// tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; -// data = icache.caches->l_ip.line_sz; -// interface_ip.specific_tag = 1; -// interface_ip.tag_w = tag; -// interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); -// interface_ip.cache_sz = data * XML->sys.core[ithCore].icache.buffer_sizes[1]; -// interface_ip.assoc = 0; -// interface_ip.nbanks = 1; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; -// interface_ip.latency = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = -// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; -// interface_ip.num_rd_ports = 0; -// interface_ip.num_wr_ports = 0; -// interface_ip.num_se_rd_ports = 0; -// interface_ip.num_search_ports = -// XML->sys.core[ithCore].number_instruction_fetch_ports; -// icache.ifb = new ArrayST(&interface_ip, -// "icacheFillBuffer", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// icache.area.set_area(icache.area.get_area() + icache.ifb->local_result.area); -// area.set_area(area.get_area() + icache.ifb->local_result.area); -// // output_data_csv(icache.ifb.local_result); - -// // prefetch buffer -// tag = XML->sys.physical_address_width + -// EXTRA_TAG_BITS; // check with previous entries to decide wthether to -// // merge. -// data = icache.caches->l_ip -// .line_sz; // separate queue to prevent from cache polution. -// interface_ip.specific_tag = 1; -// interface_ip.tag_w = tag; -// interface_ip.line_sz = data; // int(pow(2.0,ceil(log2(data)))); -// interface_ip.cache_sz = -// XML->sys.core[ithCore].icache.buffer_sizes[2] * interface_ip.line_sz; -// interface_ip.assoc = 0; -// interface_ip.nbanks = 1; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[4] / clockRate; -// interface_ip.latency = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].icache.icache_config[5] / clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = -// debug ? 1 : XML->sys.core[ithCore].number_instruction_fetch_ports; -// interface_ip.num_rd_ports = 0; -// interface_ip.num_wr_ports = 0; -// interface_ip.num_se_rd_ports = 0; -// interface_ip.num_search_ports = -// XML->sys.core[ithCore].number_instruction_fetch_ports; -// icache.prefetchb = new ArrayST(&interface_ip, -// "icacheprefetchBuffer", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// icache.area.set_area(icache.area.get_area() + -// icache.prefetchb->local_result.area); -// area.set_area(area.get_area() + icache.prefetchb->local_result.area); -// // output_data_csv(icache.prefetchb.local_result); - -// // Instruction buffer -// data = -// XML->sys.core[ithCore].instruction_length * -// XML->sys.core[ithCore] -// .peak_issue_width; // icache.caches.l_ip.line_sz; //multiple -// // threads timing sharing the instruction buffer. -// interface_ip.is_cache = false; -// interface_ip.pure_ram = true; -// interface_ip.pure_cam = false; -// interface_ip.line_sz = int(ceil(data / 8.0)); -// interface_ip.cache_sz = -// XML->sys.core[ithCore].number_hardware_threads * -// XML->sys.core[ithCore].instruction_buffer_size * -// interface_ip.line_sz > -// 64 -// ? XML->sys.core[ithCore].number_hardware_threads * -// XML->sys.core[ithCore].instruction_buffer_size * -// interface_ip.line_sz -// : 64; -// interface_ip.assoc = 1; -// interface_ip.nbanks = 1; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = 1.0 / clockRate; -// interface_ip.latency = 1.0 / clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// // NOTE: Assuming IB is time slice shared among threads, every fetch op will -// // at least fetch "fetch width" instructions. -// interface_ip.num_rw_ports = -// debug -// ? 1 -// : XML->sys.core[ithCore] -// .number_instruction_fetch_ports; // XML->sys.core[ithCore].fetch_width; -// interface_ip.num_rd_ports = 0; -// interface_ip.num_wr_ports = 0; -// interface_ip.num_se_rd_ports = 0; -// IB = new ArrayST(&interface_ip, -// "InstBuffer", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// IB->area.set_area(IB->area.get_area() + IB->local_result.area); -// area.set_area(area.get_area() + IB->local_result.area); -// // output_data_csv(IB.IB.local_result); - -// // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; -// // inst_decoder.init_decoder(is_default, &interface_ip); -// // inst_decoder.full_decoder_power(); - -// if (coredynp.predictionW > 0) { -// /* -// * BTB branch target buffer, accessed during IF stage. Virtually indexed and -// * virtually tagged It is only a cache without all the buffers in the cache -// * controller since it is more like a look up table than a cache with cache -// * controller. When access miss, no load from other places such as main -// * memory (not actively fill the misses), it is passively updated under two -// * circumstances: 1) when BPT@ID stage finds out current is a taken branch -// * while BTB missed 2) When BPT@ID stage predicts differently than BTB 3) -// * When ID stage finds out current instruction is not a branch while BTB had -// * a hit.(mark as invalid) 4) when EXEU find out wrong target has been -// * provided from BTB. -// * -// */ -// size = XML->sys.core[ithCore].BTB.BTB_config[0]; -// line = XML->sys.core[ithCore].BTB.BTB_config[1]; -// assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; -// banks = XML->sys.core[ithCore].BTB.BTB_config[3]; -// idx = debug ? 9 : int(ceil(log2(size / line / assoc))); -// // tag = -// // debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + -// // int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) -// // +EXTRA_TAG_BITS; -// tag = debug ? 51 -// : XML->sys.virtual_address_width + -// int(ceil(log2( -// XML->sys.core[ithCore].number_hardware_threads))) + -// EXTRA_TAG_BITS; -// interface_ip.is_cache = true; -// interface_ip.pure_ram = false; -// interface_ip.pure_cam = false; -// interface_ip.specific_tag = 1; -// interface_ip.tag_w = tag; -// interface_ip.cache_sz = debug ? 32768 : size; -// interface_ip.line_sz = debug ? 64 : line; -// interface_ip.assoc = debug ? 8 : assoc; -// interface_ip.nbanks = debug ? 1 : banks; -// interface_ip.out_w = interface_ip.line_sz * 8; -// interface_ip.access_mode = -// 0; // debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; -// interface_ip.throughput = -// debug ? 1.0 / clockRate -// : XML->sys.core[ithCore].BTB.BTB_config[4] / clockRate; -// interface_ip.latency = -// debug ? 3.0 / clockRate -// : XML->sys.core[ithCore].BTB.BTB_config[5] / clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = 1; -// interface_ip.num_rd_ports = coredynp.predictionW; -// interface_ip.num_wr_ports = coredynp.predictionW; -// interface_ip.num_se_rd_ports = 0; -// BTB = new ArrayST(&interface_ip, -// "Branch Target Buffer", -// Core_device, -// coredynp.opt_local, -// coredynp.core_ty); -// BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); -// area.set_area(area.get_area() + BTB->local_result.area); -// /// cout<<"area="<set_params(XML, ithCore, &interface_ip, coredynp); -// BPT->computeArea(); -// BPT->set_stats(XML); -// area.set_area(area.get_area() + BPT->area.get_area()); -// } - -// ID_inst.set_params(is_default, -// &interface_ip, -// coredynp.opcode_length, -// 1 /*Decoder should not know how many by itself*/, -// coredynp.x86, -// Core_device, -// coredynp.core_ty); - -// ID_operand.set_params(is_default, -// &interface_ip, -// coredynp.arch_ireg_width, -// 1, -// coredynp.x86, -// Core_device, -// coredynp.core_ty); - -// ID_misc.set_params(is_default, -// &interface_ip, -// 8 /* Prefix field etc upto 14B*/, -// 1, -// coredynp.x86, -// Core_device, -// coredynp.core_ty); -// ID_inst.computeArea(); -// ID_inst.computeDynamicPower(); -// ID_operand.computeArea(); -// ID_operand.computeDynamicPower(); -// ID_misc.computeArea(); -// ID_misc.computeDynamicPower(); -// // TODO: X86 decoder should decode the inst in cyclic mode under the control -// // of squencer. So the dynamic power should be multiplied by a few times. -// area.set_area(area.get_area() + -// (ID_inst.area.get_area() + ID_operand.area.get_area() + -// ID_misc.area.get_area()) * -// coredynp.decodeW); -// } - -// void InstFetchU::computeEnergy(bool is_tdp) { -// if (!exist) -// return; -// if (is_tdp) { -// // init stats for Peak -// icache.caches->stats_t.readAc.access = -// icache.caches->l_ip.num_rw_ports * coredynp.IFU_duty_cycle; -// icache.caches->stats_t.readAc.miss = 0; -// icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - -// icache.caches->stats_t.readAc.miss; -// icache.caches->tdp_stats = icache.caches->stats_t; - -// icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit = -// icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit = -// icache.missb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.missb->tdp_stats = icache.missb->stats_t; - -// icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit = -// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = -// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.ifb->tdp_stats = icache.ifb->stats_t; - -// icache.prefetchb->stats_t.readAc.access = -// icache.prefetchb->stats_t.readAc.hit = -// icache.prefetchb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit = -// icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; -// icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - -// IB->stats_t.readAc.access = IB->stats_t.writeAc.access = -// XML->sys.core[ithCore].peak_issue_width; -// IB->tdp_stats = IB->stats_t; - -// if (coredynp.predictionW > 0) { -// BTB->stats_t.readAc.access = -// coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; -// BTB->stats_t.writeAc.access = -// 0; // XML->sys.core[ithCore].BTB.write_accesses; -// } - -// ID_inst.stats_t.readAc.access = coredynp.decodeW; -// ID_operand.stats_t.readAc.access = coredynp.decodeW; -// ID_misc.stats_t.readAc.access = coredynp.decodeW; -// ID_inst.tdp_stats = ID_inst.stats_t; -// ID_operand.tdp_stats = ID_operand.stats_t; -// ID_misc.tdp_stats = ID_misc.stats_t; - -// } else { -// // init stats for Runtime Dynamic (RTP) -// icache.caches->stats_t.readAc.access = -// XML->sys.core[ithCore].icache.read_accesses; -// icache.caches->stats_t.readAc.miss = -// XML->sys.core[ithCore].icache.read_misses; -// icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - -// icache.caches->stats_t.readAc.miss; -// icache.caches->rtp_stats = icache.caches->stats_t; - -// icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; -// icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; -// icache.missb->rtp_stats = icache.missb->stats_t; - -// icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; -// icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; -// icache.ifb->rtp_stats = icache.ifb->stats_t; - -// icache.prefetchb->stats_t.readAc.access = -// icache.caches->stats_t.readAc.miss; -// icache.prefetchb->stats_t.writeAc.access = -// icache.caches->stats_t.readAc.miss; -// icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; - -// IB->stats_t.readAc.access = IB->stats_t.writeAc.access = -// XML->sys.core[ithCore].total_instructions; -// IB->rtp_stats = IB->stats_t; - -// if (coredynp.predictionW > 0) { -// BTB->stats_t.readAc.access = -// XML->sys.core[ithCore] -// .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; -// BTB->stats_t.writeAc.access = -// XML->sys.core[ithCore] -// .BTB -// .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; -// BTB->rtp_stats = BTB->stats_t; -// } - -// ID_inst.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; -// ID_operand.stats_t.readAc.access = -// XML->sys.core[ithCore].total_instructions; -// ID_misc.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; -// ID_inst.rtp_stats = ID_inst.stats_t; -// ID_operand.rtp_stats = ID_operand.stats_t; -// ID_misc.rtp_stats = ID_misc.stats_t; -// } - -// icache.power_t.reset(); -// IB->power_t.reset(); -// // ID_inst.power_t.reset(); -// // ID_operand.power_t.reset(); -// // ID_misc.power_t.reset(); -// if (coredynp.predictionW > 0) { -// BTB->power_t.reset(); -// } - -// icache.power_t.readOp.dynamic += -// (icache.caches->stats_t.readAc.hit * -// icache.caches->local_result.power.readOp.dynamic + -// // icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ -// icache.caches->stats_t.readAc.miss * -// icache.caches->local_result.power.readOp -// .dynamic + // assume tag data accessed in parallel -// icache.caches->stats_t.readAc.miss * -// icache.caches->local_result.power.writeOp -// .dynamic); // read miss in Icache cause a write to Icache -// icache.power_t.readOp.dynamic += -// icache.missb->stats_t.readAc.access * -// icache.missb->local_result.power.searchOp.dynamic + -// icache.missb->stats_t.writeAc.access * -// icache.missb->local_result.power.writeOp -// .dynamic; // each access to missb involves a CAM and a write -// icache.power_t.readOp.dynamic += -// icache.ifb->stats_t.readAc.access * -// icache.ifb->local_result.power.searchOp.dynamic + -// icache.ifb->stats_t.writeAc.access * -// icache.ifb->local_result.power.writeOp.dynamic; -// icache.power_t.readOp.dynamic += -// icache.prefetchb->stats_t.readAc.access * -// icache.prefetchb->local_result.power.searchOp.dynamic + -// icache.prefetchb->stats_t.writeAc.access * -// icache.prefetchb->local_result.power.writeOp.dynamic; - -// IB->power_t.readOp.dynamic += -// IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + -// IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; - -// if (coredynp.predictionW > 0) { -// BTB->power_t.readOp.dynamic += -// BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + -// BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; - -// BPT->computeDynamicPower(is_tdp); -// } - -// if (is_tdp) { -// // icache.power = icache.power_t + -// // (icache.caches->local_result.power)*pppm_lkg + -// // (icache.missb->local_result.power + -// // icache.ifb->local_result.power + -// // icache.prefetchb->local_result.power)*pppm_Isub; -// icache.power = icache.power_t + (icache.caches->local_result.power + -// icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power) * -// pppm_lkg; - -// IB->power = IB->power_t + IB->local_result.power * pppm_lkg; -// power = power + icache.power + IB->power; -// if (coredynp.predictionW > 0) { -// BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; -// power = power + BTB->power + BPT->power; -// } - -// ID_inst.power_t.readOp.dynamic = ID_inst.power.readOp.dynamic; -// ID_operand.power_t.readOp.dynamic = ID_operand.power.readOp.dynamic; -// ID_misc.power_t.readOp.dynamic = ID_misc.power.readOp.dynamic; - -// ID_inst.power.readOp.dynamic *= ID_inst.tdp_stats.readAc.access; -// ID_operand.power.readOp.dynamic *= ID_operand.tdp_stats.readAc.access; -// ID_misc.power.readOp.dynamic *= ID_misc.tdp_stats.readAc.access; - -// power = power + (ID_inst.power + ID_operand.power + ID_misc.power); -// } else { -// // icache.rt_power = icache.power_t + -// // (icache.caches->local_result.power)*pppm_lkg + -// // (icache.missb->local_result.power + -// // icache.ifb->local_result.power + -// // icache.prefetchb->local_result.power)*pppm_Isub; - -// icache.rt_power = icache.power_t + (icache.caches->local_result.power + -// icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power) * -// pppm_lkg; - -// IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; -// rt_power = rt_power + icache.rt_power + IB->rt_power; -// if (coredynp.predictionW > 0) { -// BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; -// rt_power = rt_power + BTB->rt_power + BPT->rt_power; -// } - -// ID_inst.rt_power.readOp.dynamic = -// ID_inst.power_t.readOp.dynamic * ID_inst.rtp_stats.readAc.access; -// ID_operand.rt_power.readOp.dynamic = ID_operand.power_t.readOp.dynamic * -// ID_operand.rtp_stats.readAc.access; -// ID_misc.rt_power.readOp.dynamic = -// ID_misc.power_t.readOp.dynamic * ID_misc.rtp_stats.readAc.access; - -// rt_power = rt_power + -// (ID_inst.rt_power + ID_operand.rt_power + ID_misc.rt_power); -// } -// } - -// void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { -// if (!exist) -// return; -// string indent_str(indent, ' '); -// string indent_str_next(indent + 2, ' '); -// bool long_channel = XML->sys.longer_channel_device; -// bool power_gating = XML->sys.power_gating; - -// if (is_tdp) { - -// cout << indent_str << "Instruction Cache:" << endl; -// cout << indent_str_next << "Area = " << icache.area.get_area() * 1e-6 -// << " mm^2" << endl; -// cout << indent_str_next -// << "Peak Dynamic = " << icache.power.readOp.dynamic * clockRate << " W" -// << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel ? icache.power.readOp.longer_channel_leakage -// : icache.power.readOp.leakage) -// << " W" << endl; -// if (power_gating) -// cout << indent_str_next << "Subthreshold Leakage with power gating = " -// << (long_channel -// ? icache.power.readOp.power_gated_with_long_channel_leakage -// : icache.power.readOp.power_gated_leakage) -// << " W" << endl; -// cout << indent_str_next -// << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" -// << endl; -// cout << indent_str_next << "Runtime Dynamic = " -// << icache.rt_power.readOp.dynamic / executionTime << " W" << endl; -// cout << endl; -// if (coredynp.predictionW > 0) { -// cout << indent_str << "Branch Target Buffer:" << endl; -// cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 -// << " mm^2" << endl; -// cout << indent_str_next -// << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" -// << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel ? BTB->power.readOp.longer_channel_leakage -// : BTB->power.readOp.leakage) -// << " W" << endl; -// if (power_gating) -// cout << indent_str_next << "Subthreshold Leakage with power gating = " -// << (long_channel -// ? BTB->power.readOp.power_gated_with_long_channel_leakage -// : BTB->power.readOp.power_gated_leakage) -// << " W" << endl; -// cout << indent_str_next -// << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" -// << endl; -// cout << indent_str_next << "Runtime Dynamic = " -// << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; -// cout << endl; -// if (BPT->exist) { -// cout << indent_str << "Branch Predictor:" << endl; -// cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 -// << " mm^2" << endl; -// cout << indent_str_next -// << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate -// << " W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel ? BPT->power.readOp.longer_channel_leakage -// : BPT->power.readOp.leakage) -// << " W" << endl; -// if (power_gating) -// cout << indent_str_next << "Subthreshold Leakage with power gating = " -// << (long_channel -// ? BPT->power.readOp.power_gated_with_long_channel_leakage -// : BPT->power.readOp.power_gated_leakage) -// << " W" << endl; -// cout << indent_str_next -// << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" -// << endl; -// cout << indent_str_next << "Runtime Dynamic = " -// << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; -// cout << endl; -// if (plevel > 3) { -// BPT->displayEnergy(indent + 4, plevel, is_tdp); -// } -// } -// } -// cout << indent_str << "Instruction Buffer:" << endl; -// cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 -// << " mm^2" << endl; -// cout << indent_str_next -// << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" -// << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel ? IB->power.readOp.longer_channel_leakage -// : IB->power.readOp.leakage) -// << " W" << endl; -// if (power_gating) -// cout << indent_str_next << "Subthreshold Leakage with power gating = " -// << (long_channel -// ? IB->power.readOp.power_gated_with_long_channel_leakage -// : IB->power.readOp.power_gated_leakage) -// << " W" << endl; -// cout << indent_str_next -// << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next -// << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime -// << " W" << endl; -// cout << endl; -// cout << indent_str << "Instruction Decoder:" << endl; -// cout << indent_str_next << "Area = " -// << (ID_inst.area.get_area() + ID_operand.area.get_area() + -// ID_misc.area.get_area()) * -// coredynp.decodeW * 1e-6 -// << " mm^2" << endl; -// cout << indent_str_next << "Peak Dynamic = " -// << (ID_inst.power.readOp.dynamic + ID_operand.power.readOp.dynamic + -// ID_misc.power.readOp.dynamic) * -// clockRate -// << " W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel ? (ID_inst.power.readOp.longer_channel_leakage + -// ID_operand.power.readOp.longer_channel_leakage + -// ID_misc.power.readOp.longer_channel_leakage) -// : (ID_inst.power.readOp.leakage + -// ID_operand.power.readOp.leakage + -// ID_misc.power.readOp.leakage)) -// << " W" << endl; - -// double tot_leakage = -// (ID_inst.power.readOp.leakage + ID_operand.power.readOp.leakage + -// ID_misc.power.readOp.leakage); -// double tot_leakage_longchannel = -// (ID_inst.power.readOp.longer_channel_leakage + -// ID_operand.power.readOp.longer_channel_leakage + -// ID_misc.power.readOp.longer_channel_leakage); -// double tot_leakage_pg = (ID_inst.power.readOp.power_gated_leakage + -// ID_operand.power.readOp.power_gated_leakage + -// ID_misc.power.readOp.power_gated_leakage); -// double tot_leakage_pg_with_long_channel = -// (ID_inst.power.readOp.power_gated_with_long_channel_leakage + -// ID_operand.power.readOp.power_gated_with_long_channel_leakage + -// ID_misc.power.readOp.power_gated_with_long_channel_leakage); - -// if (power_gating) -// cout << indent_str_next << "Subthreshold Leakage with power gating = " -// << (long_channel ? tot_leakage_pg_with_long_channel : tot_leakage_pg) -// << " W" << endl; -// cout << indent_str_next << "Gate Leakage = " -// << (ID_inst.power.readOp.gate_leakage + -// ID_operand.power.readOp.gate_leakage + -// ID_misc.power.readOp.gate_leakage) -// << " W" << endl; -// cout << indent_str_next << "Runtime Dynamic = " -// << (ID_inst.rt_power.readOp.dynamic + -// ID_operand.rt_power.readOp.dynamic + -// ID_misc.rt_power.readOp.dynamic) / -// executionTime -// << " W" << endl; -// cout << endl; -// } else { -// // cout << indent_str_next << "Instruction Cache Peak Dynamic = " -// //<< icache.rt_power.readOp.dynamic*clockRate << " W" << endl; -// // cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " -// // << icache.rt_power.readOp.leakage <<" W" << endl; cout << -// // indent_str_next << "Instruction Cache Gate Leakage = " << -// // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << -// // indent_str_next << "Instruction Buffer Peak Dynamic = " << -// // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << -// // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << -// // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next -// // << "Instruction Buffer Gate Leakage = " << -// // IB->rt_power.readOp.gate_leakage -// //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer -// // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << -// // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold -// // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout -// // << indent_str_next << "Branch Target Buffer Gate Leakage = " << -// // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << -// // indent_str_next << "Branch Predictor Peak Dynamic = " << -// // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout -// // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << -// // BPT->rt_power.readOp.leakage << " W" << endl; cout << -// // indent_str_next -// // << "Branch Predictor Gate Leakage = " << -// // BPT->rt_power.readOp.gate_leakage -// //<< " W" << endl; -// } -// } - -// InstFetchU ::~InstFetchU() { - -// if (!exist) -// return; -// if (IB) { -// delete IB; -// IB = 0; -// } -// if (coredynp.predictionW > 0) { -// if (BTB) { -// delete BTB; -// BTB = 0; -// } -// if (BPT) { -// delete BPT; -// BPT = 0; -// } -// } -// } - - /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT @@ -866,7 +48,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), IB(0), BTB(0), + coredynp(dyn_p_), exist(exist_) { if (!exist) return; @@ -1102,13 +284,14 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, + IB.set_params(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - IB->area.set_area(IB->area.get_area() + IB->local_result.area); - area.set_area(area.get_area() + IB->local_result.area); + IB.computeArea(); + IB.area.set_area(IB.area.get_area() + IB.local_result.area); + area.set_area(area.get_area() + IB.local_result.area); // output_data_csv(IB.IB.local_result); // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; @@ -1169,20 +352,20 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_rd_ports = coredynp.predictionW; interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, + BTB.set_params(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area() + BTB->local_result.area); - area.set_area(area.get_area() + BTB->local_result.area); + BTB.computeArea(); + BTB.area.set_area(BTB.area.get_area() + BTB.local_result.area); + area.set_area(area.get_area() + BTB.local_result.area); /// cout<<"area="<set_params(XML, ithCore, &interface_ip, coredynp); - BPT->computeArea(); - BPT->set_stats(XML); - area.set_area(area.get_area() + BPT->area.get_area()); + BPT.set_params(XML, ithCore, &interface_ip, coredynp); + BPT.computeArea(); + BPT.set_stats(XML); + area.set_area(area.get_area() + BPT.area.get_area()); } ID_inst.set_params(is_default, @@ -1255,14 +438,14 @@ void InstFetchU::computeEnergy(bool is_tdp) { icache.ifb->l_ip.num_search_ports * coredynp.IFU_duty_cycle; icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + IB.stats_t.readAc.access = IB.stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; + IB.tdp_stats = IB.stats_t; if (coredynp.predictionW > 0) { - BTB->stats_t.readAc.access = + BTB.stats_t.readAc.access = coredynp.predictionW; // XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = + BTB.stats_t.writeAc.access = 0; // XML->sys.core[ithCore].BTB.write_accesses; } @@ -1297,19 +480,19 @@ void InstFetchU::computeEnergy(bool is_tdp) { icache.caches->stats_t.readAc.miss; icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = + IB.stats_t.readAc.access = IB.stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; + IB.rtp_stats = IB.stats_t; if (coredynp.predictionW > 0) { - BTB->stats_t.readAc.access = + BTB.stats_t.readAc.access = XML->sys.core[ithCore] .BTB.read_accesses; // XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = + BTB.stats_t.writeAc.access = XML->sys.core[ithCore] .BTB .write_accesses; // XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; + BTB.rtp_stats = BTB.stats_t; } ID_inst.stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; @@ -1322,12 +505,12 @@ void InstFetchU::computeEnergy(bool is_tdp) { } icache.power_t.reset(); - IB->power_t.reset(); + IB.power_t.reset(); // ID_inst.power_t.reset(); // ID_operand.power_t.reset(); // ID_misc.power_t.reset(); if (coredynp.predictionW > 0) { - BTB->power_t.reset(); + BTB.power_t.reset(); } icache.power_t.readOp.dynamic += @@ -1357,16 +540,16 @@ void InstFetchU::computeEnergy(bool is_tdp) { icache.prefetchb->stats_t.writeAc.access * icache.prefetchb->local_result.power.writeOp.dynamic; - IB->power_t.readOp.dynamic += - IB->local_result.power.readOp.dynamic * IB->stats_t.readAc.access + - IB->stats_t.writeAc.access * IB->local_result.power.writeOp.dynamic; + IB.power_t.readOp.dynamic += + IB.local_result.power.readOp.dynamic * IB.stats_t.readAc.access + + IB.stats_t.writeAc.access * IB.local_result.power.writeOp.dynamic; if (coredynp.predictionW > 0) { - BTB->power_t.readOp.dynamic += - BTB->local_result.power.readOp.dynamic * BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access * BTB->local_result.power.writeOp.dynamic; + BTB.power_t.readOp.dynamic += + BTB.local_result.power.readOp.dynamic * BTB.stats_t.readAc.access + + BTB.stats_t.writeAc.access * BTB.local_result.power.writeOp.dynamic; - BPT->computeDynamicPower(is_tdp); + BPT.computeDynamicPower(is_tdp); } if (is_tdp) { @@ -1381,11 +564,11 @@ void InstFetchU::computeEnergy(bool is_tdp) { icache.prefetchb->local_result.power) * pppm_lkg; - IB->power = IB->power_t + IB->local_result.power * pppm_lkg; - power = power + icache.power + IB->power; + IB.power = IB.power_t + IB.local_result.power * pppm_lkg; + power = power + icache.power + IB.power; if (coredynp.predictionW > 0) { - BTB->power = BTB->power_t + BTB->local_result.power * pppm_lkg; - power = power + BTB->power + BPT->power; + BTB.power = BTB.power_t + BTB.local_result.power * pppm_lkg; + power = power + BTB.power + BPT.power; } ID_inst.power_t.readOp.dynamic = ID_inst.power.readOp.dynamic; @@ -1410,11 +593,11 @@ void InstFetchU::computeEnergy(bool is_tdp) { icache.prefetchb->local_result.power) * pppm_lkg; - IB->rt_power = IB->power_t + IB->local_result.power * pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; + IB.rt_power = IB.power_t + IB.local_result.power * pppm_lkg; + rt_power = rt_power + icache.rt_power + IB.rt_power; if (coredynp.predictionW > 0) { - BTB->rt_power = BTB->power_t + BTB->local_result.power * pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; + BTB.rt_power = BTB.power_t + BTB.local_result.power * pppm_lkg; + rt_power = rt_power + BTB.rt_power + BPT.rt_power; } ID_inst.rt_power.readOp.dynamic = @@ -1463,75 +646,75 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; if (coredynp.predictionW > 0) { cout << indent_str << "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << BTB.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << BTB->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << BTB.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? BTB->power.readOp.longer_channel_leakage - : BTB->power.readOp.leakage) + << (long_channel ? BTB.power.readOp.longer_channel_leakage + : BTB.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? BTB->power.readOp.power_gated_with_long_channel_leakage - : BTB->power.readOp.power_gated_leakage) + ? BTB.power.readOp.power_gated_with_long_channel_leakage + : BTB.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << BTB.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << BTB->rt_power.readOp.dynamic / executionTime << " W" << endl; + << BTB.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; - if (BPT->exist) { + if (BPT.exist) { cout << indent_str << "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << BPT.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << BPT->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << BPT.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? BPT->power.readOp.longer_channel_leakage - : BPT->power.readOp.leakage) + << (long_channel ? BPT.power.readOp.longer_channel_leakage + : BPT.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? BPT->power.readOp.power_gated_with_long_channel_leakage - : BPT->power.readOp.power_gated_leakage) + ? BPT.power.readOp.power_gated_with_long_channel_leakage + : BPT.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << BPT.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << BPT->rt_power.readOp.dynamic / executionTime << " W" << endl; + << BPT.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 3) { - BPT->displayEnergy(indent + 4, plevel, is_tdp); + BPT.displayEnergy(indent + 4, plevel, is_tdp); } } } cout << indent_str << "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << IB.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << IB->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << IB.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? IB->power.readOp.longer_channel_leakage - : IB->power.readOp.leakage) + << (long_channel ? IB.power.readOp.longer_channel_leakage + : IB.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? IB->power.readOp.power_gated_with_long_channel_leakage - : IB->power.readOp.power_gated_leakage) + ? IB.power.readOp.power_gated_with_long_channel_leakage + : IB.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; + << "Gate Leakage = " << IB.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next - << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic / executionTime + << "Runtime Dynamic = " << IB.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; cout << indent_str << "Instruction Decoder:" << endl; @@ -1593,24 +776,24 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // indent_str_next << "Instruction Cache Gate Leakage = " << // icache.rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next << "Instruction Buffer Peak Dynamic = " << - // IB->rt_power.readOp.dynamic*clockRate << " W" << endl; cout << + // IB.rt_power.readOp.dynamic*clockRate << " W" << endl; cout << // indent_str_next << "Instruction Buffer Subthreshold Leakage = " << - // IB->rt_power.readOp.leakage << " W" << endl; cout << indent_str_next + // IB.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next // << "Instruction Buffer Gate Leakage = " << - // IB->rt_power.readOp.gate_leakage + // IB.rt_power.readOp.gate_leakage //<< " W" << endl; cout << indent_str_next << "Branch Target Buffer - // Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << + // Peak Dynamic = " << BTB.rt_power.readOp.dynamic*clockRate << " W" << // endl; cout << indent_str_next << "Branch Target Buffer Subthreshold - // Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl; cout + // Leakage = " << BTB.rt_power.readOp.leakage << " W" << endl; cout // << indent_str_next << "Branch Target Buffer Gate Leakage = " << - // BTB->rt_power.readOp.gate_leakage << " W" << endl; cout << + // BTB.rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next << "Branch Predictor Peak Dynamic = " << - // BPT->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // BPT.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Branch Predictor Subthreshold Leakage = " << - // BPT->rt_power.readOp.leakage << " W" << endl; cout << + // BPT.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next // << "Branch Predictor Gate Leakage = " << - // BPT->rt_power.readOp.gate_leakage + // BPT.rt_power.readOp.gate_leakage //<< " W" << endl; } } @@ -1619,19 +802,5 @@ InstFetchU ::~InstFetchU() { if (!exist) return; - if (IB) { - delete IB; - IB = 0; - } - if (coredynp.predictionW > 0) { - if (BTB) { - delete BTB; - BTB = 0; - } - if (BPT) { - delete BPT; - BPT = 0; - } - } } \ No newline at end of file diff --git a/src/core/instfetch.h b/src/core/instfetch.h index e805c91..bb9b535 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -1,79 +1,3 @@ -// /***************************************************************************** -// * McPAT -// * SOFTWARE LICENSE AGREEMENT -// * Copyright 2012 Hewlett-Packard Development Company, L.P. -// * All Rights Reserved -// * -// * Redistribution and use in source and binary forms, with or without -// * modification, are permitted provided that the following conditions are -// * met: redistributions of source code must retain the above copyright -// * notice, this list of conditions and the following disclaimer; -// * redistributions in binary form must reproduce the above copyright -// * notice, this list of conditions and the following disclaimer in the -// * documentation and/or other materials provided with the distribution; -// * neither the name of the copyright holders nor the names of its -// * contributors may be used to endorse or promote products derived from -// * this software without specific prior written permission. - -// * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” -// * -// ***************************************************************************/ - -// #ifndef __INST_FETCH_U_H__ -// #define __INST_FETCH_U_H__ - -// #include "XML_Parse.h" -// #include "array.h" -// #include "basic_components.h" -// #include "branch_predictor.h" -// #include "inst_decoder.h" -// #include "instcache.h" -// #include "interconnect.h" -// #include "parameter.h" - -// class InstFetchU : public Component { -// public: -// const ParseXML *XML; -// int ithCore; -// InputParameter interface_ip; -// CoreDynParam coredynp; -// double clockRate; -// double executionTime; -// double scktRatio; -// double chip_PR_overhead; -// double macro_PR_overhead; -// enum Cache_policy cache_p; -// InstCache icache; -// ArrayST *IB; -// ArrayST *BTB; -// BranchPredictor *BPT; -// inst_decoder ID_inst; -// inst_decoder ID_operand; -// inst_decoder ID_misc; -// bool exist; - -// InstFetchU(const ParseXML *XML_interface, -// int ithCore_, -// InputParameter *interface_ip_, -// const CoreDynParam &dyn_p_, -// bool exsit = true); -// void computeEnergy(bool is_tdp = true); -// void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); -// ~InstFetchU(); -// }; - -// #endif // __INST_FETCH_U_H__ - /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT @@ -129,9 +53,9 @@ class InstFetchU : public Component { double macro_PR_overhead; enum Cache_policy cache_p; InstCache icache; - ArrayST *IB; - ArrayST *BTB; - BranchPredictor *BPT; + ArrayST IB; + ArrayST BTB; + BranchPredictor BPT; inst_decoder ID_inst; inst_decoder ID_operand; inst_decoder ID_misc; diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index bd4a7dc..a1edce6 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -121,10 +121,16 @@ void inst_decoder::set_params(bool _is_default, predec_blk_drv2.set_params(0, &predec_blk2, false); pre_dec.set_params(&predec_blk_drv1, &predec_blk_drv2); - + init_params = true; } void inst_decoder::computeArea(){ + if (!init_params) { + std::cerr << "[ Inst_decoder ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } double area_decoder = final_dec.area.get_area() * num_decoded_signals * num_decoder_segments * num_decoders; // double w_decoder = area_decoder / area.get_h(); diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index 2d4b7c3..282d5d4 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -27,7 +27,7 @@ class inst_decoder : public Component { bool x86_, enum Device_ty device_ty_ = Core_device, enum Core_type core_ty_ = Inorder); - inst_decoder(){}; + inst_decoder(){init_params = false;}; bool is_default; int opcode_length; int num_decoders; @@ -56,6 +56,8 @@ class inst_decoder : public Component { void inst_decoder_delay_power(); ~inst_decoder(); void leakage_feedback(double temperature); + private: + bool init_params; }; #endif //__INST_DECODER_H__ \ No newline at end of file From 038fc20ac1d7b91e87ffcec509fd27e30a3610bc Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 14:55:52 -0500 Subject: [PATCH 45/59] Updated to Unit Test Scripts and Formatting of Text Data --- unit_test/format.sh | 11 + .../{mp_1.xml => mp_01.xml} | 0 .../{mp_2.xml => mp_02.xml} | 0 .../{mp_3.xml => mp_03.xml} | 0 .../{mp_4.xml => mp_04.xml} | 0 .../{mp_5.xml => mp_05.xml} | 0 .../{mp_6.xml => mp_06.xml} | 0 .../{mp_7.xml => mp_07.xml} | 0 .../{mp_8.xml => mp_08.xml} | 0 .../{mp_9.xml => mp_09.xml} | 0 .../{mp_1.xml => mp_01.xml} | 0 .../mp_02.xml} | 0 .../{mp_3.xml => mp_03.xml} | 0 .../{mp_4.xml => mp_04.xml} | 0 .../{mp_5.xml => mp_05.xml} | 0 .../{mp_6.xml => mp_06.xml} | 0 .../{mp_7.xml => mp_07.xml} | 0 .../{mp_8.xml => mp_08.xml} | 0 .../{mp_9.xml => mp_09.xml} | 0 unit_test/input/serialization_test_2/mp_2.xml | 533 ------------------ .../input/serialization_test_2/mp_51.xml | 533 ------------------ .../{mp_1.xml => mp_01.xml} | 0 .../{mp_2.xml => mp_02.xml} | 0 .../{mp_3.xml => mp_03.xml} | 0 .../{mp_4.xml => mp_04.xml} | 0 .../{mp_5.xml => mp_05.xml} | 0 .../{mp_6.xml => mp_06.xml} | 0 .../{mp_7.xml => mp_07.xml} | 0 .../{mp_8.xml => mp_08.xml} | 0 .../{mp_9.xml => mp_09.xml} | 0 .../input/serialization_test_3/mp_51.xml | 533 ------------------ .../{mp_1.xml => mp_01.xml} | 0 .../{mp_2.xml => mp_02.xml} | 0 .../{mp_3.xml => mp_03.xml} | 0 .../{mp_4.xml => mp_04.xml} | 0 .../{mp_5.xml => mp_05.xml} | 0 .../{mp_6.xml => mp_06.xml} | 0 .../{mp_7.xml => mp_07.xml} | 0 .../{mp_8.xml => mp_08.xml} | 0 .../{mp_9.xml => mp_09.xml} | 0 .../input/serialization_test_4/mp_51.xml | 533 ------------------ unit_test/unit_test.py | 56 +- unit_test/unit_test.sh | 57 +- 43 files changed, 88 insertions(+), 2168 deletions(-) create mode 100755 unit_test/format.sh rename unit_test/input/serialization_test_1/{mp_1.xml => mp_01.xml} (100%) rename unit_test/input/serialization_test_1/{mp_2.xml => mp_02.xml} (100%) rename unit_test/input/serialization_test_1/{mp_3.xml => mp_03.xml} (100%) rename unit_test/input/serialization_test_1/{mp_4.xml => mp_04.xml} (100%) rename unit_test/input/serialization_test_1/{mp_5.xml => mp_05.xml} (100%) rename unit_test/input/serialization_test_1/{mp_6.xml => mp_06.xml} (100%) rename unit_test/input/serialization_test_1/{mp_7.xml => mp_07.xml} (100%) rename unit_test/input/serialization_test_1/{mp_8.xml => mp_08.xml} (100%) rename unit_test/input/serialization_test_1/{mp_9.xml => mp_09.xml} (100%) rename unit_test/input/serialization_test_2/{mp_1.xml => mp_01.xml} (100%) rename unit_test/input/{serialization_test_1/mp_51.xml => serialization_test_2/mp_02.xml} (100%) rename unit_test/input/serialization_test_2/{mp_3.xml => mp_03.xml} (100%) rename unit_test/input/serialization_test_2/{mp_4.xml => mp_04.xml} (100%) rename unit_test/input/serialization_test_2/{mp_5.xml => mp_05.xml} (100%) rename unit_test/input/serialization_test_2/{mp_6.xml => mp_06.xml} (100%) rename unit_test/input/serialization_test_2/{mp_7.xml => mp_07.xml} (100%) rename unit_test/input/serialization_test_2/{mp_8.xml => mp_08.xml} (100%) rename unit_test/input/serialization_test_2/{mp_9.xml => mp_09.xml} (100%) delete mode 100644 unit_test/input/serialization_test_2/mp_2.xml delete mode 100644 unit_test/input/serialization_test_2/mp_51.xml rename unit_test/input/serialization_test_3/{mp_1.xml => mp_01.xml} (100%) rename unit_test/input/serialization_test_3/{mp_2.xml => mp_02.xml} (100%) rename unit_test/input/serialization_test_3/{mp_3.xml => mp_03.xml} (100%) rename unit_test/input/serialization_test_3/{mp_4.xml => mp_04.xml} (100%) rename unit_test/input/serialization_test_3/{mp_5.xml => mp_05.xml} (100%) rename unit_test/input/serialization_test_3/{mp_6.xml => mp_06.xml} (100%) rename unit_test/input/serialization_test_3/{mp_7.xml => mp_07.xml} (100%) rename unit_test/input/serialization_test_3/{mp_8.xml => mp_08.xml} (100%) rename unit_test/input/serialization_test_3/{mp_9.xml => mp_09.xml} (100%) delete mode 100644 unit_test/input/serialization_test_3/mp_51.xml rename unit_test/input/serialization_test_4/{mp_1.xml => mp_01.xml} (100%) rename unit_test/input/serialization_test_4/{mp_2.xml => mp_02.xml} (100%) rename unit_test/input/serialization_test_4/{mp_3.xml => mp_03.xml} (100%) rename unit_test/input/serialization_test_4/{mp_4.xml => mp_04.xml} (100%) rename unit_test/input/serialization_test_4/{mp_5.xml => mp_05.xml} (100%) rename unit_test/input/serialization_test_4/{mp_6.xml => mp_06.xml} (100%) rename unit_test/input/serialization_test_4/{mp_7.xml => mp_07.xml} (100%) rename unit_test/input/serialization_test_4/{mp_8.xml => mp_08.xml} (100%) rename unit_test/input/serialization_test_4/{mp_9.xml => mp_09.xml} (100%) delete mode 100644 unit_test/input/serialization_test_4/mp_51.xml diff --git a/unit_test/format.sh b/unit_test/format.sh new file mode 100755 index 0000000..375f128 --- /dev/null +++ b/unit_test/format.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +new_name="" + +for i in {1..50..1}; do + printf -v new_name "mp_%02d.xml" $i + if [ -f "$1/mp_$i.xml" ]; then + echo "mv "$1/mp_$i.xml" $1/$new_name" + mv "$1/mp_$i.xml" $1/$new_name + fi +done diff --git a/unit_test/input/serialization_test_1/mp_1.xml b/unit_test/input/serialization_test_1/mp_01.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_1.xml rename to unit_test/input/serialization_test_1/mp_01.xml diff --git a/unit_test/input/serialization_test_1/mp_2.xml b/unit_test/input/serialization_test_1/mp_02.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_2.xml rename to unit_test/input/serialization_test_1/mp_02.xml diff --git a/unit_test/input/serialization_test_1/mp_3.xml b/unit_test/input/serialization_test_1/mp_03.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_3.xml rename to unit_test/input/serialization_test_1/mp_03.xml diff --git a/unit_test/input/serialization_test_1/mp_4.xml b/unit_test/input/serialization_test_1/mp_04.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_4.xml rename to unit_test/input/serialization_test_1/mp_04.xml diff --git a/unit_test/input/serialization_test_1/mp_5.xml b/unit_test/input/serialization_test_1/mp_05.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_5.xml rename to unit_test/input/serialization_test_1/mp_05.xml diff --git a/unit_test/input/serialization_test_1/mp_6.xml b/unit_test/input/serialization_test_1/mp_06.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_6.xml rename to unit_test/input/serialization_test_1/mp_06.xml diff --git a/unit_test/input/serialization_test_1/mp_7.xml b/unit_test/input/serialization_test_1/mp_07.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_7.xml rename to unit_test/input/serialization_test_1/mp_07.xml diff --git a/unit_test/input/serialization_test_1/mp_8.xml b/unit_test/input/serialization_test_1/mp_08.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_8.xml rename to unit_test/input/serialization_test_1/mp_08.xml diff --git a/unit_test/input/serialization_test_1/mp_9.xml b/unit_test/input/serialization_test_1/mp_09.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_9.xml rename to unit_test/input/serialization_test_1/mp_09.xml diff --git a/unit_test/input/serialization_test_2/mp_1.xml b/unit_test/input/serialization_test_2/mp_01.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_1.xml rename to unit_test/input/serialization_test_2/mp_01.xml diff --git a/unit_test/input/serialization_test_1/mp_51.xml b/unit_test/input/serialization_test_2/mp_02.xml similarity index 100% rename from unit_test/input/serialization_test_1/mp_51.xml rename to unit_test/input/serialization_test_2/mp_02.xml diff --git a/unit_test/input/serialization_test_2/mp_3.xml b/unit_test/input/serialization_test_2/mp_03.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_3.xml rename to unit_test/input/serialization_test_2/mp_03.xml diff --git a/unit_test/input/serialization_test_2/mp_4.xml b/unit_test/input/serialization_test_2/mp_04.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_4.xml rename to unit_test/input/serialization_test_2/mp_04.xml diff --git a/unit_test/input/serialization_test_2/mp_5.xml b/unit_test/input/serialization_test_2/mp_05.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_5.xml rename to unit_test/input/serialization_test_2/mp_05.xml diff --git a/unit_test/input/serialization_test_2/mp_6.xml b/unit_test/input/serialization_test_2/mp_06.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_6.xml rename to unit_test/input/serialization_test_2/mp_06.xml diff --git a/unit_test/input/serialization_test_2/mp_7.xml b/unit_test/input/serialization_test_2/mp_07.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_7.xml rename to unit_test/input/serialization_test_2/mp_07.xml diff --git a/unit_test/input/serialization_test_2/mp_8.xml b/unit_test/input/serialization_test_2/mp_08.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_8.xml rename to unit_test/input/serialization_test_2/mp_08.xml diff --git a/unit_test/input/serialization_test_2/mp_9.xml b/unit_test/input/serialization_test_2/mp_09.xml similarity index 100% rename from unit_test/input/serialization_test_2/mp_9.xml rename to unit_test/input/serialization_test_2/mp_09.xml diff --git a/unit_test/input/serialization_test_2/mp_2.xml b/unit_test/input/serialization_test_2/mp_2.xml deleted file mode 100644 index c2617b2..0000000 --- a/unit_test/input/serialization_test_2/mp_2.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/unit_test/input/serialization_test_2/mp_51.xml b/unit_test/input/serialization_test_2/mp_51.xml deleted file mode 100644 index e48c198..0000000 --- a/unit_test/input/serialization_test_2/mp_51.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/unit_test/input/serialization_test_3/mp_1.xml b/unit_test/input/serialization_test_3/mp_01.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_1.xml rename to unit_test/input/serialization_test_3/mp_01.xml diff --git a/unit_test/input/serialization_test_3/mp_2.xml b/unit_test/input/serialization_test_3/mp_02.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_2.xml rename to unit_test/input/serialization_test_3/mp_02.xml diff --git a/unit_test/input/serialization_test_3/mp_3.xml b/unit_test/input/serialization_test_3/mp_03.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_3.xml rename to unit_test/input/serialization_test_3/mp_03.xml diff --git a/unit_test/input/serialization_test_3/mp_4.xml b/unit_test/input/serialization_test_3/mp_04.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_4.xml rename to unit_test/input/serialization_test_3/mp_04.xml diff --git a/unit_test/input/serialization_test_3/mp_5.xml b/unit_test/input/serialization_test_3/mp_05.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_5.xml rename to unit_test/input/serialization_test_3/mp_05.xml diff --git a/unit_test/input/serialization_test_3/mp_6.xml b/unit_test/input/serialization_test_3/mp_06.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_6.xml rename to unit_test/input/serialization_test_3/mp_06.xml diff --git a/unit_test/input/serialization_test_3/mp_7.xml b/unit_test/input/serialization_test_3/mp_07.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_7.xml rename to unit_test/input/serialization_test_3/mp_07.xml diff --git a/unit_test/input/serialization_test_3/mp_8.xml b/unit_test/input/serialization_test_3/mp_08.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_8.xml rename to unit_test/input/serialization_test_3/mp_08.xml diff --git a/unit_test/input/serialization_test_3/mp_9.xml b/unit_test/input/serialization_test_3/mp_09.xml similarity index 100% rename from unit_test/input/serialization_test_3/mp_9.xml rename to unit_test/input/serialization_test_3/mp_09.xml diff --git a/unit_test/input/serialization_test_3/mp_51.xml b/unit_test/input/serialization_test_3/mp_51.xml deleted file mode 100644 index ab07584..0000000 --- a/unit_test/input/serialization_test_3/mp_51.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/unit_test/input/serialization_test_4/mp_1.xml b/unit_test/input/serialization_test_4/mp_01.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_1.xml rename to unit_test/input/serialization_test_4/mp_01.xml diff --git a/unit_test/input/serialization_test_4/mp_2.xml b/unit_test/input/serialization_test_4/mp_02.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_2.xml rename to unit_test/input/serialization_test_4/mp_02.xml diff --git a/unit_test/input/serialization_test_4/mp_3.xml b/unit_test/input/serialization_test_4/mp_03.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_3.xml rename to unit_test/input/serialization_test_4/mp_03.xml diff --git a/unit_test/input/serialization_test_4/mp_4.xml b/unit_test/input/serialization_test_4/mp_04.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_4.xml rename to unit_test/input/serialization_test_4/mp_04.xml diff --git a/unit_test/input/serialization_test_4/mp_5.xml b/unit_test/input/serialization_test_4/mp_05.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_5.xml rename to unit_test/input/serialization_test_4/mp_05.xml diff --git a/unit_test/input/serialization_test_4/mp_6.xml b/unit_test/input/serialization_test_4/mp_06.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_6.xml rename to unit_test/input/serialization_test_4/mp_06.xml diff --git a/unit_test/input/serialization_test_4/mp_7.xml b/unit_test/input/serialization_test_4/mp_07.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_7.xml rename to unit_test/input/serialization_test_4/mp_07.xml diff --git a/unit_test/input/serialization_test_4/mp_8.xml b/unit_test/input/serialization_test_4/mp_08.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_8.xml rename to unit_test/input/serialization_test_4/mp_08.xml diff --git a/unit_test/input/serialization_test_4/mp_9.xml b/unit_test/input/serialization_test_4/mp_09.xml similarity index 100% rename from unit_test/input/serialization_test_4/mp_9.xml rename to unit_test/input/serialization_test_4/mp_09.xml diff --git a/unit_test/input/serialization_test_4/mp_51.xml b/unit_test/input/serialization_test_4/mp_51.xml deleted file mode 100644 index 1858c7f..0000000 --- a/unit_test/input/serialization_test_4/mp_51.xml +++ /dev/null @@ -1,533 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index 6446a77..f7d4aa9 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -43,7 +43,7 @@ parser.add_argument('--input', type=str, default="./input/basic_test_1", help="Test Input Path") parser.add_argument("--output", type=str, default="./output/basic_test_1", help="Test Output Path") parser.add_argument("--golden", type=str, default="./golden/basic_test_1", help="Test Golden Path") -parser.add_argument("--serial", type=bool, default=False, help="Serial if true, Basic if false" +parser.add_argument("--serial", type=bool, default=False, help="Serial if true, Basic if false") args = parser.parse_args() input_path = args.input @@ -91,9 +91,9 @@ def kill(p): def diff_result(vector): - outfile = os.path.join("./output", vector + ".out") - difffile = os.path.join("./output", vector + ".diff") - goldfile = os.path.join("./golden", vector + ".golden") + outfile = os.path.join(output_path, vector + ".out") + difffile = os.path.join(output_path, vector + ".diff") + goldfile = os.path.join(golden_path, vector + ".golden") with open(outfile, "r") as o, open(goldfile, "r") as g: outlines = o.readlines() goldlines = g.readlines() @@ -161,8 +161,7 @@ def run_test_serializaiton_create(vector): if kill_flag: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 - if (os.stat(os.path.join(output_path, vector + ".err")).st_size == 0) and - (os.stat(os.path.join(output_path, vector + ".txt")).st_size > 0): + if (os.stat(os.path.join(output_path, vector + ".txt")).st_size > 0): print_pass(vector) return 0 else: @@ -191,15 +190,16 @@ def run_test_serialization_restore(vector, sfile): if kill_flag: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 - else: - if diff_result(vector) == 0: - print_pass(vector) - return 0 - else: - print_fail( - vector, - "The files " + vector + ".out and " + vector + ".golden differ") - return 1 + #else: + # if diff_result(vector) == 0: + # print_pass(vector) + # return 0 + # else: + # print_fail( + # vector, + # "The files " + vector + ".out and " + vector + ".golden differ") + # return 1 + print_pass(vector) return 0 @@ -215,23 +215,25 @@ def get_vectors(): print_info(start) vectors = get_vectors() print_info("Found " + str(len(vectors)) + " test vectors") - for vector in vectors: - if not args.serial: + if not args.serial: + for vector in vectors: if run_test_normal(vector) == 0: p += 1 else: f += 1 else: # Create a Serialized File: - for vector in vectors: - if run_test_serializaiton_create(vector) == 0: - p += 1 + if(len(vectors) > 0): + if run_test_serializaiton_create(vectors[0]) == 0: + # Use Serialized File for Remainder of Tests: + for vector in vectors: + if run_test_serialization_restore(vector, vectors[0]) == 0: + p += 1 + else: + f += 1 else: - f += 1 - # Use Serialized File for Remainder of Tests: - for vector in vectors: - if run_test_serialization_restore(vector, vector[0]) == 0: - p += 1 - else: - f += 1 + print_info("Failed to create serialization checkpoint") + else: + print_info("No files in "+input_path) + sys.exit(1) print_results(p, f, len(vectors)) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 8fb1695..71c5bb9 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -26,11 +26,12 @@ print_info () { echo -e "[ $script_name ] $1" } -#-------------------------------------------------------------------- -# -# -# -#-------------------------------------------------------------------- +print_info "#########################################################" +print_info "# #" +print_info "# McPAT Unit Test v2.0.0 #" +print_info "# #" +print_info "#########################################################" + TESTS=("basic_test_1" "serialization_test_1" "serialization_test_2" @@ -50,9 +51,9 @@ OUTPUT="./output" if [ ! -d $OUTPUT ]; then print_info "Creating $OUTPUT" mkdir -p $OUTPUT -#else -# print_info "Cleaning $OUTPUT" -# rm -f $OUTPUT/* +else + print_info "Cleaning $OUTPUT" + rm -rf $OUTPUT/* fi for test_set in ${TESTS[@]}; do mkdir -p $OUTPUT/$test_set @@ -67,4 +68,42 @@ done # |_| |_____|____/ |_| |____/ # #-------------------------------------------------------------------- -./unit_test.py +print_info "#########################################################" +print_info "# Unit Test Basic 1 #" +print_info "#########################################################" +./unit_test.py \ + --input=./input/basic_test_1 \ + --output=./output/basic_test_1 \ + --golden=./golden/basic_test_1 +print_info "#########################################################" +print_info "# Unit Test Serialization 1 #" +print_info "#########################################################" +#./unit_test.py \ +# --input=./input/serialization_test_1 \ +# --output=./output/serialization_test_1 \ +# --golden=./golden/serialization_test_1 \ +# --serial=True +print_info "#########################################################" +print_info "# Unit Test Serialization 2 #" +print_info "#########################################################" +#./unit_test.py \ +# --input=./input/serialization_test_2 \ +# --output=./output/serialization_test_2 \ +# --golden=./golden/serialization_test_2 \ +# --serial=True +print_info "#########################################################" +print_info "# Unit Test Serialization 3 #" +print_info "#########################################################" +#./unit_test.py \ +# --input=./input/serialization_test_3 \ +# --output=./output/serialization_test_3 \ +# --golden=./golden/serialization_test_3 \ +# --serial=True +print_info "#########################################################" +print_info "# Unit Test Serialization 4 #" +print_info "#########################################################" +#./unit_test.py \ +# --input=./input/serialization_test_4 \ +# --output=./output/serialization_test_4 \ +# --golden=./golden/serialization_test_4 \ +# --serial=True From bc738f91b4c835261a6107220cbecf832f31695b Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 16:09:46 -0500 Subject: [PATCH 46/59] Unit Test in Parallel --- unit_test/format.sh | 18 +- .../golden/serialization_test_1/mp_01.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_02.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_03.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_04.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_05.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_06.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_07.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_08.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_09.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_10.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_11.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_12.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_13.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_14.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_15.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_16.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_17.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_18.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_19.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_20.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_21.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_22.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_23.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_24.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_25.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_26.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_27.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_28.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_29.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_30.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_31.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_32.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_33.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_34.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_35.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_36.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_37.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_38.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_39.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_40.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_41.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_42.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_43.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_44.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_45.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_46.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_47.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_48.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_49.golden | 488 ++++++++++++++++++ .../golden/serialization_test_1/mp_50.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_01.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_02.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_03.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_04.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_05.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_06.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_07.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_08.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_09.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_10.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_11.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_12.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_13.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_14.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_15.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_16.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_17.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_18.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_19.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_20.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_21.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_22.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_23.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_24.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_25.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_26.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_27.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_28.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_29.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_30.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_31.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_32.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_33.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_34.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_35.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_36.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_37.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_38.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_39.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_40.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_41.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_42.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_43.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_44.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_45.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_46.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_47.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_48.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_49.golden | 488 ++++++++++++++++++ .../golden/serialization_test_2/mp_50.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_01.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_02.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_03.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_04.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_05.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_06.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_07.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_08.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_09.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_10.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_11.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_12.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_13.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_14.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_15.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_16.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_17.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_18.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_19.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_20.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_21.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_22.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_23.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_24.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_25.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_26.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_27.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_28.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_29.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_30.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_31.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_32.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_33.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_34.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_35.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_36.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_37.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_38.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_39.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_40.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_41.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_42.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_43.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_44.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_45.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_46.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_47.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_48.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_49.golden | 488 ++++++++++++++++++ .../golden/serialization_test_3/mp_50.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_01.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_02.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_03.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_04.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_05.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_06.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_07.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_08.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_09.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_10.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_11.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_12.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_13.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_14.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_15.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_16.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_17.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_18.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_19.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_20.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_21.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_22.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_23.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_24.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_25.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_26.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_27.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_28.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_29.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_30.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_31.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_32.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_33.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_34.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_35.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_36.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_37.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_38.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_39.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_40.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_41.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_42.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_43.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_44.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_45.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_46.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_47.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_48.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_49.golden | 488 ++++++++++++++++++ .../golden/serialization_test_4/mp_50.golden | 488 ++++++++++++++++++ unit_test/unit_test.py | 132 +++-- unit_test/unit_test.sh | 59 ++- 203 files changed, 97741 insertions(+), 68 deletions(-) create mode 100644 unit_test/golden/serialization_test_1/mp_01.golden create mode 100644 unit_test/golden/serialization_test_1/mp_02.golden create mode 100644 unit_test/golden/serialization_test_1/mp_03.golden create mode 100644 unit_test/golden/serialization_test_1/mp_04.golden create mode 100644 unit_test/golden/serialization_test_1/mp_05.golden create mode 100644 unit_test/golden/serialization_test_1/mp_06.golden create mode 100644 unit_test/golden/serialization_test_1/mp_07.golden create mode 100644 unit_test/golden/serialization_test_1/mp_08.golden create mode 100644 unit_test/golden/serialization_test_1/mp_09.golden create mode 100644 unit_test/golden/serialization_test_1/mp_10.golden create mode 100644 unit_test/golden/serialization_test_1/mp_11.golden create mode 100644 unit_test/golden/serialization_test_1/mp_12.golden create mode 100644 unit_test/golden/serialization_test_1/mp_13.golden create mode 100644 unit_test/golden/serialization_test_1/mp_14.golden create mode 100644 unit_test/golden/serialization_test_1/mp_15.golden create mode 100644 unit_test/golden/serialization_test_1/mp_16.golden create mode 100644 unit_test/golden/serialization_test_1/mp_17.golden create mode 100644 unit_test/golden/serialization_test_1/mp_18.golden create mode 100644 unit_test/golden/serialization_test_1/mp_19.golden create mode 100644 unit_test/golden/serialization_test_1/mp_20.golden create mode 100644 unit_test/golden/serialization_test_1/mp_21.golden create mode 100644 unit_test/golden/serialization_test_1/mp_22.golden create mode 100644 unit_test/golden/serialization_test_1/mp_23.golden create mode 100644 unit_test/golden/serialization_test_1/mp_24.golden create mode 100644 unit_test/golden/serialization_test_1/mp_25.golden create mode 100644 unit_test/golden/serialization_test_1/mp_26.golden create mode 100644 unit_test/golden/serialization_test_1/mp_27.golden create mode 100644 unit_test/golden/serialization_test_1/mp_28.golden create mode 100644 unit_test/golden/serialization_test_1/mp_29.golden create mode 100644 unit_test/golden/serialization_test_1/mp_30.golden create mode 100644 unit_test/golden/serialization_test_1/mp_31.golden create mode 100644 unit_test/golden/serialization_test_1/mp_32.golden create mode 100644 unit_test/golden/serialization_test_1/mp_33.golden create mode 100644 unit_test/golden/serialization_test_1/mp_34.golden create mode 100644 unit_test/golden/serialization_test_1/mp_35.golden create mode 100644 unit_test/golden/serialization_test_1/mp_36.golden create mode 100644 unit_test/golden/serialization_test_1/mp_37.golden create mode 100644 unit_test/golden/serialization_test_1/mp_38.golden create mode 100644 unit_test/golden/serialization_test_1/mp_39.golden create mode 100644 unit_test/golden/serialization_test_1/mp_40.golden create mode 100644 unit_test/golden/serialization_test_1/mp_41.golden create mode 100644 unit_test/golden/serialization_test_1/mp_42.golden create mode 100644 unit_test/golden/serialization_test_1/mp_43.golden create mode 100644 unit_test/golden/serialization_test_1/mp_44.golden create mode 100644 unit_test/golden/serialization_test_1/mp_45.golden create mode 100644 unit_test/golden/serialization_test_1/mp_46.golden create mode 100644 unit_test/golden/serialization_test_1/mp_47.golden create mode 100644 unit_test/golden/serialization_test_1/mp_48.golden create mode 100644 unit_test/golden/serialization_test_1/mp_49.golden create mode 100644 unit_test/golden/serialization_test_1/mp_50.golden create mode 100644 unit_test/golden/serialization_test_2/mp_01.golden create mode 100644 unit_test/golden/serialization_test_2/mp_02.golden create mode 100644 unit_test/golden/serialization_test_2/mp_03.golden create mode 100644 unit_test/golden/serialization_test_2/mp_04.golden create mode 100644 unit_test/golden/serialization_test_2/mp_05.golden create mode 100644 unit_test/golden/serialization_test_2/mp_06.golden create mode 100644 unit_test/golden/serialization_test_2/mp_07.golden create mode 100644 unit_test/golden/serialization_test_2/mp_08.golden create mode 100644 unit_test/golden/serialization_test_2/mp_09.golden create mode 100644 unit_test/golden/serialization_test_2/mp_10.golden create mode 100644 unit_test/golden/serialization_test_2/mp_11.golden create mode 100644 unit_test/golden/serialization_test_2/mp_12.golden create mode 100644 unit_test/golden/serialization_test_2/mp_13.golden create mode 100644 unit_test/golden/serialization_test_2/mp_14.golden create mode 100644 unit_test/golden/serialization_test_2/mp_15.golden create mode 100644 unit_test/golden/serialization_test_2/mp_16.golden create mode 100644 unit_test/golden/serialization_test_2/mp_17.golden create mode 100644 unit_test/golden/serialization_test_2/mp_18.golden create mode 100644 unit_test/golden/serialization_test_2/mp_19.golden create mode 100644 unit_test/golden/serialization_test_2/mp_20.golden create mode 100644 unit_test/golden/serialization_test_2/mp_21.golden create mode 100644 unit_test/golden/serialization_test_2/mp_22.golden create mode 100644 unit_test/golden/serialization_test_2/mp_23.golden create mode 100644 unit_test/golden/serialization_test_2/mp_24.golden create mode 100644 unit_test/golden/serialization_test_2/mp_25.golden create mode 100644 unit_test/golden/serialization_test_2/mp_26.golden create mode 100644 unit_test/golden/serialization_test_2/mp_27.golden create mode 100644 unit_test/golden/serialization_test_2/mp_28.golden create mode 100644 unit_test/golden/serialization_test_2/mp_29.golden create mode 100644 unit_test/golden/serialization_test_2/mp_30.golden create mode 100644 unit_test/golden/serialization_test_2/mp_31.golden create mode 100644 unit_test/golden/serialization_test_2/mp_32.golden create mode 100644 unit_test/golden/serialization_test_2/mp_33.golden create mode 100644 unit_test/golden/serialization_test_2/mp_34.golden create mode 100644 unit_test/golden/serialization_test_2/mp_35.golden create mode 100644 unit_test/golden/serialization_test_2/mp_36.golden create mode 100644 unit_test/golden/serialization_test_2/mp_37.golden create mode 100644 unit_test/golden/serialization_test_2/mp_38.golden create mode 100644 unit_test/golden/serialization_test_2/mp_39.golden create mode 100644 unit_test/golden/serialization_test_2/mp_40.golden create mode 100644 unit_test/golden/serialization_test_2/mp_41.golden create mode 100644 unit_test/golden/serialization_test_2/mp_42.golden create mode 100644 unit_test/golden/serialization_test_2/mp_43.golden create mode 100644 unit_test/golden/serialization_test_2/mp_44.golden create mode 100644 unit_test/golden/serialization_test_2/mp_45.golden create mode 100644 unit_test/golden/serialization_test_2/mp_46.golden create mode 100644 unit_test/golden/serialization_test_2/mp_47.golden create mode 100644 unit_test/golden/serialization_test_2/mp_48.golden create mode 100644 unit_test/golden/serialization_test_2/mp_49.golden create mode 100644 unit_test/golden/serialization_test_2/mp_50.golden create mode 100644 unit_test/golden/serialization_test_3/mp_01.golden create mode 100644 unit_test/golden/serialization_test_3/mp_02.golden create mode 100644 unit_test/golden/serialization_test_3/mp_03.golden create mode 100644 unit_test/golden/serialization_test_3/mp_04.golden create mode 100644 unit_test/golden/serialization_test_3/mp_05.golden create mode 100644 unit_test/golden/serialization_test_3/mp_06.golden create mode 100644 unit_test/golden/serialization_test_3/mp_07.golden create mode 100644 unit_test/golden/serialization_test_3/mp_08.golden create mode 100644 unit_test/golden/serialization_test_3/mp_09.golden create mode 100644 unit_test/golden/serialization_test_3/mp_10.golden create mode 100644 unit_test/golden/serialization_test_3/mp_11.golden create mode 100644 unit_test/golden/serialization_test_3/mp_12.golden create mode 100644 unit_test/golden/serialization_test_3/mp_13.golden create mode 100644 unit_test/golden/serialization_test_3/mp_14.golden create mode 100644 unit_test/golden/serialization_test_3/mp_15.golden create mode 100644 unit_test/golden/serialization_test_3/mp_16.golden create mode 100644 unit_test/golden/serialization_test_3/mp_17.golden create mode 100644 unit_test/golden/serialization_test_3/mp_18.golden create mode 100644 unit_test/golden/serialization_test_3/mp_19.golden create mode 100644 unit_test/golden/serialization_test_3/mp_20.golden create mode 100644 unit_test/golden/serialization_test_3/mp_21.golden create mode 100644 unit_test/golden/serialization_test_3/mp_22.golden create mode 100644 unit_test/golden/serialization_test_3/mp_23.golden create mode 100644 unit_test/golden/serialization_test_3/mp_24.golden create mode 100644 unit_test/golden/serialization_test_3/mp_25.golden create mode 100644 unit_test/golden/serialization_test_3/mp_26.golden create mode 100644 unit_test/golden/serialization_test_3/mp_27.golden create mode 100644 unit_test/golden/serialization_test_3/mp_28.golden create mode 100644 unit_test/golden/serialization_test_3/mp_29.golden create mode 100644 unit_test/golden/serialization_test_3/mp_30.golden create mode 100644 unit_test/golden/serialization_test_3/mp_31.golden create mode 100644 unit_test/golden/serialization_test_3/mp_32.golden create mode 100644 unit_test/golden/serialization_test_3/mp_33.golden create mode 100644 unit_test/golden/serialization_test_3/mp_34.golden create mode 100644 unit_test/golden/serialization_test_3/mp_35.golden create mode 100644 unit_test/golden/serialization_test_3/mp_36.golden create mode 100644 unit_test/golden/serialization_test_3/mp_37.golden create mode 100644 unit_test/golden/serialization_test_3/mp_38.golden create mode 100644 unit_test/golden/serialization_test_3/mp_39.golden create mode 100644 unit_test/golden/serialization_test_3/mp_40.golden create mode 100644 unit_test/golden/serialization_test_3/mp_41.golden create mode 100644 unit_test/golden/serialization_test_3/mp_42.golden create mode 100644 unit_test/golden/serialization_test_3/mp_43.golden create mode 100644 unit_test/golden/serialization_test_3/mp_44.golden create mode 100644 unit_test/golden/serialization_test_3/mp_45.golden create mode 100644 unit_test/golden/serialization_test_3/mp_46.golden create mode 100644 unit_test/golden/serialization_test_3/mp_47.golden create mode 100644 unit_test/golden/serialization_test_3/mp_48.golden create mode 100644 unit_test/golden/serialization_test_3/mp_49.golden create mode 100644 unit_test/golden/serialization_test_3/mp_50.golden create mode 100644 unit_test/golden/serialization_test_4/mp_01.golden create mode 100644 unit_test/golden/serialization_test_4/mp_02.golden create mode 100644 unit_test/golden/serialization_test_4/mp_03.golden create mode 100644 unit_test/golden/serialization_test_4/mp_04.golden create mode 100644 unit_test/golden/serialization_test_4/mp_05.golden create mode 100644 unit_test/golden/serialization_test_4/mp_06.golden create mode 100644 unit_test/golden/serialization_test_4/mp_07.golden create mode 100644 unit_test/golden/serialization_test_4/mp_08.golden create mode 100644 unit_test/golden/serialization_test_4/mp_09.golden create mode 100644 unit_test/golden/serialization_test_4/mp_10.golden create mode 100644 unit_test/golden/serialization_test_4/mp_11.golden create mode 100644 unit_test/golden/serialization_test_4/mp_12.golden create mode 100644 unit_test/golden/serialization_test_4/mp_13.golden create mode 100644 unit_test/golden/serialization_test_4/mp_14.golden create mode 100644 unit_test/golden/serialization_test_4/mp_15.golden create mode 100644 unit_test/golden/serialization_test_4/mp_16.golden create mode 100644 unit_test/golden/serialization_test_4/mp_17.golden create mode 100644 unit_test/golden/serialization_test_4/mp_18.golden create mode 100644 unit_test/golden/serialization_test_4/mp_19.golden create mode 100644 unit_test/golden/serialization_test_4/mp_20.golden create mode 100644 unit_test/golden/serialization_test_4/mp_21.golden create mode 100644 unit_test/golden/serialization_test_4/mp_22.golden create mode 100644 unit_test/golden/serialization_test_4/mp_23.golden create mode 100644 unit_test/golden/serialization_test_4/mp_24.golden create mode 100644 unit_test/golden/serialization_test_4/mp_25.golden create mode 100644 unit_test/golden/serialization_test_4/mp_26.golden create mode 100644 unit_test/golden/serialization_test_4/mp_27.golden create mode 100644 unit_test/golden/serialization_test_4/mp_28.golden create mode 100644 unit_test/golden/serialization_test_4/mp_29.golden create mode 100644 unit_test/golden/serialization_test_4/mp_30.golden create mode 100644 unit_test/golden/serialization_test_4/mp_31.golden create mode 100644 unit_test/golden/serialization_test_4/mp_32.golden create mode 100644 unit_test/golden/serialization_test_4/mp_33.golden create mode 100644 unit_test/golden/serialization_test_4/mp_34.golden create mode 100644 unit_test/golden/serialization_test_4/mp_35.golden create mode 100644 unit_test/golden/serialization_test_4/mp_36.golden create mode 100644 unit_test/golden/serialization_test_4/mp_37.golden create mode 100644 unit_test/golden/serialization_test_4/mp_38.golden create mode 100644 unit_test/golden/serialization_test_4/mp_39.golden create mode 100644 unit_test/golden/serialization_test_4/mp_40.golden create mode 100644 unit_test/golden/serialization_test_4/mp_41.golden create mode 100644 unit_test/golden/serialization_test_4/mp_42.golden create mode 100644 unit_test/golden/serialization_test_4/mp_43.golden create mode 100644 unit_test/golden/serialization_test_4/mp_44.golden create mode 100644 unit_test/golden/serialization_test_4/mp_45.golden create mode 100644 unit_test/golden/serialization_test_4/mp_46.golden create mode 100644 unit_test/golden/serialization_test_4/mp_47.golden create mode 100644 unit_test/golden/serialization_test_4/mp_48.golden create mode 100644 unit_test/golden/serialization_test_4/mp_49.golden create mode 100644 unit_test/golden/serialization_test_4/mp_50.golden diff --git a/unit_test/format.sh b/unit_test/format.sh index 375f128..b09f5b2 100755 --- a/unit_test/format.sh +++ b/unit_test/format.sh @@ -1,11 +1,21 @@ #!/bin/bash new_name="" +old_name="" + +#for i in {1..50..1}; do +# printf -v new_name "mp_%02d.xml" $i +# if [ -f "$1/mp_$i.xml" ]; then +# echo "mv "$1/mp_$i.xml" $1/$new_name" +# mv "$1/mp_$i.xml" $1/$new_name +# fi +#done for i in {1..50..1}; do - printf -v new_name "mp_%02d.xml" $i - if [ -f "$1/mp_$i.xml" ]; then - echo "mv "$1/mp_$i.xml" $1/$new_name" - mv "$1/mp_$i.xml" $1/$new_name + printf -v old_name "mp_%02d.out" $i + printf -v new_name "mp_%02d.golden" $i + if [ -f "$1/$old_name" ]; then + echo "mv $1/$old_name $1/$new_name" + mv $1/$old_name $1/$new_name fi done diff --git a/unit_test/golden/serialization_test_1/mp_01.golden b/unit_test/golden/serialization_test_1/mp_01.golden new file mode 100644 index 0000000..e67363b --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_01.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 66.6424 W + Total Leakage = 11.3508 W + Peak Dynamic = 55.2916 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.3271 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.621207 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0156185 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 33.0603 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.69027 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.621207 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0711184 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00181336 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0010276 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0521097 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000142962 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0517704 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 5.33859e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000142962 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00671513 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00767836 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.027451 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.018204 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 5.10827e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00181409 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000315992 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 3.61486e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 1.15077e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0191419 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0033865 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0046604 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0093208 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.00443541 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.499054 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.00876295 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00896962 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000206673 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0242591 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0077717 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0164874 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0847972 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0250598 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 6.34388e-06 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.26508 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.69027 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.645073 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.29073 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.29487 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.754477 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0156185 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0156185 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.00869495 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00690027 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 2.32719e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_02.golden b/unit_test/golden/serialization_test_1/mp_02.golden new file mode 100644 index 0000000..2128f02 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_02.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 16.6054 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.787 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.787 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.88501 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.489014 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.240996 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.275565 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.814103 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.327829 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0473035 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0111351 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.925964 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.450928 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.122743 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.245487 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.89442 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.545944 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.552165 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.895357 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.395734 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.499624 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.671081 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.00573 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000483485 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0872393 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0692327 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000233495 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_03.golden b/unit_test/golden/serialization_test_1/mp_03.golden new file mode 100644 index 0000000..1c1b265 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_03.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 10.2514 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.52704 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.52704 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0380246 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0869382 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.256327 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.234965 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.09235 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.588009 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.555538 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_04.golden b/unit_test/golden/serialization_test_1/mp_04.golden new file mode 100644 index 0000000..78a632a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_04.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 68.2671 W + Total Leakage = 11.3508 W + Peak Dynamic = 56.9163 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.43121 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.422369 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 2.28001e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0224533 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 34.685 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.98636 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.422369 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.00049741 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.000232061 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.000265349 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 4.45648e-05 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 7.15121e-06 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 4.58982e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 1.40125e-05 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 5.22347e-06 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 3.24798e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 1.03398e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.421074 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000185697 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000185697 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.00218017 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.000969264 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0012109 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000753153 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 2.28001e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.67124 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.98636 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.656233 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.33093 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.70103 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.999197 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0224533 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0224533 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0125 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00991991 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 3.3456e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_05.golden b/unit_test/golden/serialization_test_1/mp_05.golden new file mode 100644 index 0000000..9d234a7 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_05.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.78884 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.8477 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.8477 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.80466 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.54048 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.445233 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0178965 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0611669 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0139766 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000357725 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00184973 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.234786 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.26758 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.2884 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.264263 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0365787 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.678954 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.233033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0445303 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.40139 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.501975 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.434921 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.2237 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_06.golden b/unit_test/golden/serialization_test_1/mp_06.golden new file mode 100644 index 0000000..9ad0d0d --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_06.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.1728 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26296 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26296 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.74726 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.13359 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.280303 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0338694 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.632766 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.219416 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.507332 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.24182 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.573565 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.579785 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.981291 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.31712 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.664171 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.614712 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.911446 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_07.golden b/unit_test/golden/serialization_test_1/mp_07.golden new file mode 100644 index 0000000..e703960 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_07.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 14.2275 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.99945 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 11.1023 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.99945 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.21209 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0144815 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.33805 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.108463 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0127547 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00261057 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.170886 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.117483 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.14477 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0373251 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.043546 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.474934 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0534333 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.421501 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 11.1023 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 6.09117 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_08.golden b/unit_test/golden/serialization_test_1/mp_08.golden new file mode 100644 index 0000000..31b3248 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_08.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 14.0307 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.19815 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.19815 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.53299 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.279866 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.320011 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.50849 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.853503 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.115552 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0261545 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.579508 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.188319 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.32042 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.25674 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.479007 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.485228 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.08851 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.344812 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.743703 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.727449 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.10002 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_09.golden b/unit_test/golden/serialization_test_1/mp_09.golden new file mode 100644 index 0000000..e58fd38 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_09.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.19965 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.25812 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.25812 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.46319 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.163255 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.186673 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.385997 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.31268 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.85559 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.448649 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.45487 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.599173 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.275201 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.323972 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.445606 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000337218 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_10.golden b/unit_test/golden/serialization_test_1/mp_10.golden new file mode 100644 index 0000000..a4e6d18 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_10.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.6402 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.91596 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.91596 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.918807 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.299089 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00417118 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.31214 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.125851 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0125848 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.256002 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.3222 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0400624 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0462832 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.591916 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0504899 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.541427 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.107395 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_11.golden b/unit_test/golden/serialization_test_1/mp_11.golden new file mode 100644 index 0000000..61cde67 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_11.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 5.01467 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.07334 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.07334 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.186709 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0621925 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0711135 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.215106 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.164749 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.34856 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.109736 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.115957 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.254788 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0983534 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.156434 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.220004 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_12.golden b/unit_test/golden/serialization_test_1/mp_12.golden new file mode 100644 index 0000000..5b66a0a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_12.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_13.golden b/unit_test/golden/serialization_test_1/mp_13.golden new file mode 100644 index 0000000..099021d --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_13.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 71.3255 W + Total Leakage = 11.3508 W + Peak Dynamic = 59.9747 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 5.02844 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.565389 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 2.47988e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0315446 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 37.7434 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 4.43148 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.565389 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0493763 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00177215 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.000669499 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0339503 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 9.31421e-05 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0337292 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 3.47817e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 9.31421e-05 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00605771 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00692664 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.021393 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0185234 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 4.99218e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0023433 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000506504 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 3.5327e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 1.12462e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.00652615 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.00140235 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.00170793 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.00341586 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.485666 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.00880617 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00900815 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000201976 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0204655 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0072788 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0131867 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0838467 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0234698 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00242798 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 2.47988e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 9.43585 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 4.43148 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.667866 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.37284 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 2.46564 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 1.39078 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0315446 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0315446 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0175612 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0139364 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.70023e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_14.golden b/unit_test/golden/serialization_test_1/mp_14.golden new file mode 100644 index 0000000..451d86b --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_14.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 16.5547 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.70494 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.70494 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.88245 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.489014 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.2099 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.240008 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.06663 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.319946 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0501602 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0123529 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.430637 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.427227 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.89747 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.521807 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.528028 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.9173 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.345191 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.572108 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.577133 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.848588 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000528169 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.104687 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0830792 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000280194 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_15.golden b/unit_test/golden/serialization_test_1/mp_15.golden new file mode 100644 index 0000000..a69a522 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_15.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 6.14645 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.26798 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.26798 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.958169 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00417118 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0544184 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0622243 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.31214 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.125851 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0125848 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.256002 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.63486 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0669366 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0731575 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.676831 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0839075 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.592924 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.182553 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.188575 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_16.golden b/unit_test/golden/serialization_test_1/mp_16.golden new file mode 100644 index 0000000..af5a265 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_16.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.69349 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.72081 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.72081 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.120056 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0310962 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0355567 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.215106 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.164749 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.08739 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.082862 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0890828 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.22137 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0649358 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.156434 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.126184 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0942875 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_17.golden b/unit_test/golden/serialization_test_1/mp_17.golden new file mode 100644 index 0000000..b15fbca --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_17.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.219 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.43216 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.43216 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.944284 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.172905 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.153069 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0162105 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0957967 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.21884 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.103515 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.109736 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.244462 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.106901 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.137561 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.238921 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.282863 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000337218 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_18.golden b/unit_test/golden/serialization_test_1/mp_18.golden new file mode 100644 index 0000000..61418de --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_18.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.81676 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.93829 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.93829 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.124754 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.300552 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.18611 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.186912 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.13997 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.082862 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0890828 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.238714 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0605873 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.178126 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.107395 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_19.golden b/unit_test/golden/serialization_test_1/mp_19.golden new file mode 100644 index 0000000..4d41587 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_19.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.40875 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.501795 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0284919 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.501795 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0151484 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00706733 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00808108 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0013572 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000217787 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000139781 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000426744 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000159079 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 9.89156e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000314894 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.485289 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00565532 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00565532 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.066396 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0295185 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0368775 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0284919 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0284919 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0158617 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0125878 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.24537e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_20.golden b/unit_test/golden/serialization_test_1/mp_20.golden new file mode 100644 index 0000000..13b6c0e --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_20.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.78728 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.89164 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.89164 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.505136 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.116611 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.133338 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.431438 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.328153 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0484122 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.011541 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.106477 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.79703 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.197326 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.203547 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.43503 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.16185 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.27318 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.332869 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.440009 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248487 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_21.golden b/unit_test/golden/serialization_test_1/mp_21.golden new file mode 100644 index 0000000..812292b --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_21.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.77308 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.39072 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.39072 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.305428 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.514165 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.292118 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.32042 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.95845 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.223702 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.229923 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.40143 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.111491 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.289939 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.220132 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.251433 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_22.golden b/unit_test/golden/serialization_test_1/mp_22.golden new file mode 100644 index 0000000..a73d4b7 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_22.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.05264 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.17399 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.17399 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.294497 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0406352 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0359896 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0034558 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000580916 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.154538 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.684321 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0427996 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0490205 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.122191 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0511154 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0710752 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.107395 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_23.golden b/unit_test/golden/serialization_test_1/mp_23.golden new file mode 100644 index 0000000..07e511a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_23.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.89872 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.656468 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.21091 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.656468 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0273439 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0442156 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0106807 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.547526 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.08591 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0534396 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.21091 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 4.31128 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_24.golden b/unit_test/golden/serialization_test_1/mp_24.golden new file mode 100644 index 0000000..8aec86d --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_24.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 66.8718 W + Total Leakage = 11.3508 W + Peak Dynamic = 55.521 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.45885 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.10988 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020005 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 33.2897 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.32897 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.10988 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.292948 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00580662 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00438735 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.222483 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000610378 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.221034 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000227931 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000610378 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0281189 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0321524 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0858913 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0734505 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 3.27147e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0100284 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0022828 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 2.31505e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 7.36985e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0223848 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0074616 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0149232 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.708621 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0290661 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0303896 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00132359 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0897041 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.03417 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0555341 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.13378 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.106993 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 3.11207e-05 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.32243 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.32897 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.35222 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.540888 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020005 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020005 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0111369 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00883821 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 2.98079e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_25.golden b/unit_test/golden/serialization_test_1/mp_25.golden new file mode 100644 index 0000000..2cb2652 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_25.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 6.59138 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.65023 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.65023 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.85418 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.489014 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.418272 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.216686 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0250399 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00504614 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.101464 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.16917 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.119441 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.125662 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.45995 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0832428 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.376707 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000337218 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_26.golden b/unit_test/golden/serialization_test_1/mp_26.golden new file mode 100644 index 0000000..44acd19 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_26.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.6512 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.727959 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.044773 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.727959 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.261813 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00490966 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.248969 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000683042 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.247348 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000255066 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000683042 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00370193 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00423294 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.000710914 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000114079 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 7.32186e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000223533 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 8.33269e-05 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 5.18129e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000164944 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.453672 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00296231 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00296231 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0347789 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0154621 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0193168 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0117631 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.044773 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.044773 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0249255 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0197808 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 6.67129e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_27.golden b/unit_test/golden/serialization_test_1/mp_27.golden new file mode 100644 index 0000000..d491a2b --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_27.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 13.99 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.20294 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.20294 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.4914 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.178803 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.204451 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.587747 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.332792 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0344685 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00626393 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.307013 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.68313 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.354838 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.361059 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.590973 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.247955 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.343018 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.483185 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.691442 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_28.golden b/unit_test/golden/serialization_test_1/mp_28.golden new file mode 100644 index 0000000..bd69a8f --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_28.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.78923 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.87943 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.87943 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.94813 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.132159 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.151116 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.513531 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.407536 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0515283 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0111351 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.329169 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.06175 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.152536 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.158757 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.593879 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.196013 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.397867 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.389238 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.534296 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_29.golden b/unit_test/golden/serialization_test_1/mp_29.golden new file mode 100644 index 0000000..2370f39 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_29.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.18289 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.27271 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.27271 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0487052 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.129661 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.149525 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.117483 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.864709 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.317648 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.285178 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_30.golden b/unit_test/golden/serialization_test_1/mp_30.golden new file mode 100644 index 0000000..e0fd086 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_30.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 66.15 W + Total Leakage = 11.3508 W + Peak Dynamic = 54.7992 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.28479 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.47514 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0174117 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 32.5679 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.79224 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.47514 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0151029 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0101078 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00190931 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00143964 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00164614 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0230439 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0200857 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 2.84739e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00236198 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000483438 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 2.01495e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 6.4145e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.432418 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00115201 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00115201 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0135251 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.00601302 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.00751209 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00457453 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.14197 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.79224 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.657702 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.33622 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.17176 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.798313 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0174117 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0174117 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.00969326 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00769252 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 2.59439e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_31.golden b/unit_test/golden/serialization_test_1/mp_31.golden new file mode 100644 index 0000000..d4ce1dc --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_31.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 8.0771 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.1673 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.1673 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.91876 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.132159 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.151116 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.610077 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.351154 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0376247 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.164749 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.34001 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.20504 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.211261 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.648744 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.190562 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.458182 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.389238 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.534296 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_32.golden b/unit_test/golden/serialization_test_1/mp_32.golden new file mode 100644 index 0000000..5ab416e --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_32.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_33.golden b/unit_test/golden/serialization_test_1/mp_33.golden new file mode 100644 index 0000000..488ea3f --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_33.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 67.653 W + Total Leakage = 11.3508 W + Peak Dynamic = 56.3022 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.4273 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.591551 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 3.11333e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0357697 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 34.0709 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.79995 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.591551 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0638988 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00812742 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00112068 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0426224 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000116934 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0423448 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 4.36662e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000116934 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00507004 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00579729 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0247868 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0180536 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 4.17824e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00177015 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000302584 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 2.95672e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 9.41258e-06 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0149441 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.00234741 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0038119 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.00762381 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.00290247 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.479616 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.00782342 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00799247 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000169045 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0182329 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.00616577 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0120671 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0810484 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0187892 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00540278 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 3.11333e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.51772 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.79995 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.645792 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.29332 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.54751 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.860838 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0357697 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0357697 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0199133 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0158031 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 5.32978e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_34.golden b/unit_test/golden/serialization_test_1/mp_34.golden new file mode 100644 index 0000000..71b2ac2 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_34.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 19.2308 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.4124 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.4124 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.57716 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.489014 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.264318 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.302232 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.0952 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.441016 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0467537 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00869951 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.11709 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.49412 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.157813 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.315626 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.373823 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.24866 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.605415 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.611636 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.969018 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.384036 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.584983 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.68987 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.03716 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000483485 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0872393 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0692327 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000233495 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_35.golden b/unit_test/golden/serialization_test_1/mp_35.golden new file mode 100644 index 0000000..0f1521a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_35.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 12.3792 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.65481 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.65481 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.545995 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00417118 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.101063 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.115559 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.434987 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.197676 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0196663 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00342243 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.371891 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.16843 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.186377 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.192598 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.696702 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.13764 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.559062 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.408579 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_36.golden b/unit_test/golden/serialization_test_1/mp_36.golden new file mode 100644 index 0000000..d0eebd2 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_36.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.53097 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.62098 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.62098 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0816172 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.000869963 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.257829 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.181567 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.117483 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.939753 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0311043 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0373251 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.233419 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0424183 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.191001 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_37.golden b/unit_test/golden/serialization_test_1/mp_37.golden new file mode 100644 index 0000000..7b77422 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_37.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 67.4612 W + Total Leakage = 11.3508 W + Peak Dynamic = 56.1104 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 4.25645 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.436626 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.80996e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020795 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 33.8791 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 3.79901 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.436626 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0130286 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.000244319 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0123894 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 3.39902e-05 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0123088 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 1.26928e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 3.39902e-05 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.000184219 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.000210644 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 3.53773e-05 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 5.67691e-06 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 3.64358e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 1.11237e-05 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 4.1466e-06 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 2.57837e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 8.20813e-06 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.420624 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000147414 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000147414 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0017307 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.000769439 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.000961263 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00293829 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.80996e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 8.46977 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 3.79901 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.647316 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.29881 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 1.49956 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.852887 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020795 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.020795 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0115768 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00918727 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 3.09851e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_38.golden b/unit_test/golden/serialization_test_1/mp_38.golden new file mode 100644 index 0000000..27bb40c --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_38.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.2347 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26217 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26217 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.49985 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0618617 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.963773 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.556798 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0550736 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00951137 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.339055 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.22099 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.286409 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.29263 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.868218 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.264191 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.604028 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.558343 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.817159 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248934 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_39.golden b/unit_test/golden/serialization_test_1/mp_39.golden new file mode 100644 index 0000000..cdae9ab --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_39.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.7908 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.88101 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.88101 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.69698 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86397 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.619272 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.352776 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0431682 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00910544 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.721298 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.24706 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.70976 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.409333 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.415554 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.2618 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.354955 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.906841 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.595922 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.880017 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_40.golden b/unit_test/golden/serialization_test_1/mp_40.golden new file mode 100644 index 0000000..c9990df --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_40.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 11.1884 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.29276 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.29276 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.17721 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.405659 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.352776 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0431682 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00910544 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.685488 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.475071 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.99955 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.198073 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.204294 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.478652 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.191188 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.287464 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.565725 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248487 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_41.golden b/unit_test/golden/serialization_test_1/mp_41.golden new file mode 100644 index 0000000..066cdb4 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_41.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 18.1803 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.40795 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.219388 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 11.5528 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.40795 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.52308 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.147707 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.168894 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.693716 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.321665 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0262381 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00342243 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.382247 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.57026 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.348618 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.354838 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.557029 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.237113 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.319916 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.565725 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0250397 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 11.5528 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.33748 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 4.78523 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 5.43004 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.219388 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.219388 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.122135 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0969257 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000326893 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_42.golden b/unit_test/golden/serialization_test_1/mp_42.golden new file mode 100644 index 0000000..4a61808 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_42.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 13.6727 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.7941 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.7941 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.92993 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.064065 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 4.18267 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0114751 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 4.15544 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00428511 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0114751 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.318736 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.364456 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.05374 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.9142 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.114403 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0245308 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.02548 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.604648 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.78491 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.474777 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.480998 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.982927 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.378023 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.604904 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.783818 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.19431 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_43.golden b/unit_test/golden/serialization_test_1/mp_43.golden new file mode 100644 index 0000000..de04992 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_43.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 17.4842 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 14.6055 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 14.6055 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.90282 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0598938 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.65984 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.63601 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00374947 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.551958 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.631132 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.971911 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.849286 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.101139 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0208774 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 2.36822 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 1.68437 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.227952 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.455904 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 6.36256 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 1.16305 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 1.16928 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.36574 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.69029 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.675447 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.3475 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 2.13718 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_44.golden b/unit_test/golden/serialization_test_1/mp_44.golden new file mode 100644 index 0000000..1283b06 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_44.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 15.5166 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.71537 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.71537 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.91977 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0495835 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.13701 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00860633 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.11658 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00321383 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00860633 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.342059 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.391124 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.293879 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.253734 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0324611 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.45736 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 1.03653 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.04436 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.842804 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.849025 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.673472 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.438915 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.234557 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.858976 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.32003 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_45.golden b/unit_test/golden/serialization_test_1/mp_45.golden new file mode 100644 index 0000000..d2c68f2 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_45.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.44924 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.57078 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.57078 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.552624 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0490205 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0552413 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0847106 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0441453 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_46.golden b/unit_test/golden/serialization_test_1/mp_46.golden new file mode 100644 index 0000000..bf0f547 --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_46.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 9.23409 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_47.golden b/unit_test/golden/serialization_test_1/mp_47.golden new file mode 100644 index 0000000..5b66a0a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_47.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_48.golden b/unit_test/golden/serialization_test_1/mp_48.golden new file mode 100644 index 0000000..5ab416e --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_48.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_49.golden b/unit_test/golden/serialization_test_1/mp_49.golden new file mode 100644 index 0000000..5b66a0a --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_49.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 62.3667 W + Total Leakage = 11.3508 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_1/mp_50.golden b/unit_test/golden/serialization_test_1/mp_50.golden new file mode 100644 index 0000000..5ab416e --- /dev/null +++ b/unit_test/golden/serialization_test_1/mp_50.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 34.723 mm^2 + Peak Power = 86.7769 W + Total Leakage = 11.3508 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 11.3359 W + Subthreshold Leakage with power gating = 3.78612 W + Gate Leakage = 0.0148585 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 20.0589 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.90621 W + Subthreshold Leakage with power gating = 0.94727 W + Gate Leakage = 0.00358496 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 5.01472 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.976552 W + Subthreshold Leakage with power gating = 0.236817 W + Gate Leakage = 0.00089624 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.64714 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.305948 W + Subthreshold Leakage with power gating = 0.0734276 W + Gate Leakage = 0.00028235 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 3.13742 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.582762 W + Subthreshold Leakage with power gating = 0.139863 W + Gate Leakage = 0.000537813 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_01.golden b/unit_test/golden/serialization_test_2/mp_01.golden new file mode 100644 index 0000000..a3dded2 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_01.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 16.872 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.13084 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 11.5528 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.13084 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.74432 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.489014 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.06897 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.06145 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00124657 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0621925 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0711135 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.233856 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0543512 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00661204 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00139278 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.249865 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.7955 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.140094 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.146315 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.714091 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.150973 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.563119 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.220004 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000483485 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 11.5528 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.33748 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 4.78523 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 5.43004 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.104687 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0830792 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000280194 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_02.golden b/unit_test/golden/serialization_test_2/mp_02.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_02.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_03.golden b/unit_test/golden/serialization_test_2/mp_03.golden new file mode 100644 index 0000000..1a692cb --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_03.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.92215 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.45569 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000201001 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0659813 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 4.40027 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.45569 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.233253 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.128688 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0409161 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0467852 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.141799 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0662375 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 8.09258e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00662358 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00116033 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 5.72669e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000182307 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0999236 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0276865 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0553729 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0421606 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.925457 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0694118 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0726859 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00327413 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.158247 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.061249 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0969985 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.148929 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.132333 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0131018 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000201001 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 4.40027 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.740085 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.63302 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.02716 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0659813 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0659813 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0367323 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0291506 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.83138e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_04.golden b/unit_test/golden/serialization_test_2/mp_04.golden new file mode 100644 index 0000000..628c8f0 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_04.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.88329 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.00482 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.00482 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0380246 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0869382 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.197585 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.176224 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.628872 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.124533 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0920626 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_05.golden b/unit_test/golden/serialization_test_2/mp_05.golden new file mode 100644 index 0000000..44ba12d --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_05.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.60706 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.35748 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.113968 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.13561 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.35748 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.731477 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.222279 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.018746 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.475304 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00130399 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.472209 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000486944 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00130399 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00706733 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00808108 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.118259 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.106064 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000139781 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0100061 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00163519 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 9.89156e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000314894 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.485289 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00565532 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00565532 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.066396 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0295185 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0368775 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0224568 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.13561 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.05207 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.757 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.32654 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.113968 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.113968 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0634468 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.050351 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000169815 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_06.golden b/unit_test/golden/serialization_test_2/mp_06.golden new file mode 100644 index 0000000..c87faf4 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_06.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.24988 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.30855 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.30855 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.32735 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.773504 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.124385 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.142227 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.362447 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.170782 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0171493 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.302469 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.20914 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.216735 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.222956 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.649342 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.187104 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.462238 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.351659 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.471438 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000337218 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_07.golden b/unit_test/golden/serialization_test_2/mp_07.golden new file mode 100644 index 0000000..b5eae58 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_07.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.29302 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.568826 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.568826 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0587414 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_08.golden b/unit_test/golden/serialization_test_2/mp_08.golden new file mode 100644 index 0000000..425a7a6 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_08.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 64.7726 W + Total Leakage = 8.05706 W + Peak Dynamic = 56.7155 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 4.32343 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.476045 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.66044e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0218025 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 34.4842 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 3.82557 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.476045 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0283446 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.000593285 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00067241 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0227319 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 6.23647e-05 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0225839 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 2.32886e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 6.23647e-05 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00202802 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00231892 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00679415 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.00590101 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 3.34259e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000726086 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000153822 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 2.36537e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 7.53006e-06 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.438206 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.000973705 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00110894 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000135236 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.00742163 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.00251425 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.00490737 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0739002 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.00683243 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00270068 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.66044e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 8.62106 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 3.82557 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.64448 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.28859 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 1.65085 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.892498 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0218025 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0218025 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0121376 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00963237 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 3.24863e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_09.golden b/unit_test/golden/serialization_test_2/mp_09.golden new file mode 100644 index 0000000..1715ea7 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_09.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.90672 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.49322 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.49322 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.83181 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0932887 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.10667 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.254249 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0359896 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0034558 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000580916 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.217353 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.03141 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.177419 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.18364 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.669075 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.174385 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.494691 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.2765 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345721 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248934 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_10.golden b/unit_test/golden/serialization_test_2/mp_10.golden new file mode 100644 index 0000000..20e44ed --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_10.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 70.8262 W + Total Leakage = 8.05706 W + Peak Dynamic = 62.7691 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 4.63719 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.489383 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 40.5378 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 4.05378 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.489383 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.00833162 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00388703 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00444459 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00074646 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000119783 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 7.68795e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000234709 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 8.74932e-05 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 5.44036e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000173191 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.455411 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00311043 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00311043 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0365178 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0162352 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0202826 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248934 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 10.1345 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 4.05378 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 3.16424 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.2657 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_11.golden b/unit_test/golden/serialization_test_2/mp_11.golden new file mode 100644 index 0000000..b0515d0 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_11.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.68351 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.77371 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.77371 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.7748 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.186577 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.21334 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.708754 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.404292 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0404412 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.333242 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.7967 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.345134 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.351355 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.671529 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.242448 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.429081 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.483185 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.691442 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_12.golden b/unit_test/golden/serialization_test_2/mp_12.golden new file mode 100644 index 0000000..b104177 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_12.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.5586 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.6488 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.6488 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.196105 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0466444 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0533351 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.385997 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.27235 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.176224 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.52933 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.131882 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.138103 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.342952 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0835234 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.259429 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.163763 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.157146 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_13.golden b/unit_test/golden/serialization_test_2/mp_13.golden new file mode 100644 index 0000000..bba34e2 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_13.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.06517 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.12402 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.12402 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.89147 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.035102 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.108837 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.124449 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.366647 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.32426 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0351078 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.28739 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.55381 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.10899 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.115211 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.373085 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.147346 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.225739 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.408579 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_14.golden b/unit_test/golden/serialization_test_2/mp_14.golden new file mode 100644 index 0000000..791feb2 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_14.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 12.5563 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.61513 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.61513 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.20506 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04654 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.342059 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.391124 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.10075 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.412824 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0398019 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.720033 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.157813 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.315626 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.400525 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.18839 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.740035 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.746256 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.179 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.434056 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.744941 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.896555 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.38288 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_15.golden b/unit_test/golden/serialization_test_2/mp_15.golden new file mode 100644 index 0000000..799dbfc --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_15.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.52612 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.61632 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.61632 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.7473 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0247917 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.946113 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.306548 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0341689 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.443236 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.293707 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.373823 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.10585 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0821156 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0883364 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.60497 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.104813 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.500157 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.220132 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.251433 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_16.golden b/unit_test/golden/serialization_test_2/mp_16.golden new file mode 100644 index 0000000..398ef58 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_16.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.09246 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.08863 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.08863 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.0685 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.54527 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00126913 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.54202 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000710927 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00126913 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.171029 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.195562 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.680679 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.341324 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0338292 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.005858 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.340648 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.186912 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.787 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.394901 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.401122 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.669778 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.261684 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.408094 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.445606 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248934 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_17.golden b/unit_test/golden/serialization_test_2/mp_17.golden new file mode 100644 index 0000000..2114529 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_17.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.45742 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.51628 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.51628 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.94483 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0125135 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.108837 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.124449 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.453931 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.324585 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0362165 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0213613 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.01805 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.164231 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.170452 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.696636 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.153095 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.543541 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.408579 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_18.golden b/unit_test/golden/serialization_test_2/mp_18.golden new file mode 100644 index 0000000..b013afc --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_18.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 13.8468 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.937 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.937 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.08428 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.342059 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.391124 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.79152 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.879521 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0852771 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0143825 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.23391 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.189306 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.561112 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.507332 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.31992 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.717391 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.723611 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.2125 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.422765 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.789734 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.877765 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.35145 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_19.golden b/unit_test/golden/serialization_test_2/mp_19.golden new file mode 100644 index 0000000..e8d2f46 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_19.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 14.626 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.6536 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.6536 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.75883 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0618617 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.90964 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0092409 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.88759 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00356929 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0092409 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.272092 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.311121 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.09152 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.440367 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0445363 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00788765 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.934727 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.259155 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.373823 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.46981 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.627561 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.633782 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.06759 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.408004 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.659582 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.727449 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.10002 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248487 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_20.golden b/unit_test/golden/serialization_test_2/mp_20.golden new file mode 100644 index 0000000..24addb3 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_20.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.1792 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.30072 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.30072 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.00407 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0495835 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.65984 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.63601 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00374947 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0544184 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0622243 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.849796 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.406563 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0482022 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.14873 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.05781 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0482741 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0544949 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.904831 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.086168 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.818663 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.182553 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.188575 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_21.golden b/unit_test/golden/serialization_test_2/mp_21.golden new file mode 100644 index 0000000..5b90e0d --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_21.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.7764 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.89784 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000146267 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.89784 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.7401 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0289629 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.240996 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.275565 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.934543 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.748297 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0837793 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0164122 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.845838 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.561456 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.32396 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.307561 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.313781 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.0055 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.345042 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.660458 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.633502 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.942875 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_22.golden b/unit_test/golden/serialization_test_2/mp_22.golden new file mode 100644 index 0000000..25a0727 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_22.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.15589 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.27743 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.27743 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.51439 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.139933 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.160005 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.416954 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.369191 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0396723 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00748172 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.339994 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.00609 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.249084 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.255305 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.484395 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.205749 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.278646 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.389238 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.534296 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_23.golden b/unit_test/golden/serialization_test_2/mp_23.golden new file mode 100644 index 0000000..c5c58cd --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_23.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.4068 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.52834 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.52834 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.0706 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.342478 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.305574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0308428 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00545207 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.522176 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.259155 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.59309 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.479754 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.485975 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.48919 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.254633 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.234557 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.520764 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.7543 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_24.golden b/unit_test/golden/serialization_test_2/mp_24.golden new file mode 100644 index 0000000..89d395d --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_24.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.19405 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.33013 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.33013 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.73358 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0855147 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.097781 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.122598 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.108138 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.011646 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.148401 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.32517 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.127652 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.133873 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.276433 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.146985 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.129448 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.257711 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.314292 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_25.golden b/unit_test/golden/serialization_test_2/mp_25.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_25.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_26.golden b/unit_test/golden/serialization_test_2/mp_26.golden new file mode 100644 index 0000000..416741d --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_26.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.45142 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510276 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510276 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_27.golden b/unit_test/golden/serialization_test_2/mp_27.golden new file mode 100644 index 0000000..fe190fc --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_27.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.93034 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.95785 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.95785 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.33681 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.267872 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.234723 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0270875 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00545207 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.30377 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.106253 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.112473 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.326651 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.10973 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.216921 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.238921 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.282863 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0494049 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_28.golden b/unit_test/golden/serialization_test_2/mp_28.golden new file mode 100644 index 0000000..f9c2857 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_28.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.12798 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.24952 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.24952 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.65358 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.101063 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.115559 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.165418 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.144537 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0168497 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00342243 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0526043 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.37791 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0910737 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0972945 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.265321 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.14469 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.12063 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.29529 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.37715 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_29.golden b/unit_test/golden/serialization_test_2/mp_29.golden new file mode 100644 index 0000000..f02b37d --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_29.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.05266 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.04884 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.04884 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.28339 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0932887 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.10667 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.210208 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.188495 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0180881 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0526043 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.47755 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.182147 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.188368 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.324103 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.164145 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.159958 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.2765 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345721 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0250844 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_30.golden b/unit_test/golden/serialization_test_2/mp_30.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_30.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_31.golden b/unit_test/golden/serialization_test_2/mp_31.golden new file mode 100644 index 0000000..2c4021e --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_31.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.41989 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_32.golden b/unit_test/golden/serialization_test_2/mp_32.golden new file mode 100644 index 0000000..6bab89c --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_32.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 4.02029 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.0478 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.0478 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0939441 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0310962 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0355567 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.130085 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.11667 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0110067 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00179871 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0526043 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.721761 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.152212 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0499219 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.10229 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.126184 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0942875 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0494049 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_33.golden b/unit_test/golden/serialization_test_2/mp_33.golden new file mode 100644 index 0000000..514b64e --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_33.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 13.8641 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.15429 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 11.5528 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.15429 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.458733 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0855147 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.097781 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.135602 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.117644 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0143328 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.148401 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.41126 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.250577 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.256798 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.239603 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.137313 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.10229 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.257711 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.314292 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000292535 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 11.5528 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.33748 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 4.78523 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 5.43004 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0872393 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0692327 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000233495 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_34.golden b/unit_test/golden/serialization_test_2/mp_34.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_34.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_35.golden b/unit_test/golden/serialization_test_2/mp_35.golden new file mode 100644 index 0000000..74b75c5 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_35.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38893 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_36.golden b/unit_test/golden/serialization_test_2/mp_36.golden new file mode 100644 index 0000000..4043d62 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_36.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.31132 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_37.golden b/unit_test/golden/serialization_test_2/mp_37.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_37.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_38.golden b/unit_test/golden/serialization_test_2/mp_38.golden new file mode 100644 index 0000000..500c722 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_38.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.23409 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_39.golden b/unit_test/golden/serialization_test_2/mp_39.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_39.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_40.golden b/unit_test/golden/serialization_test_2/mp_40.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_40.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_41.golden b/unit_test/golden/serialization_test_2/mp_41.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_41.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_42.golden b/unit_test/golden/serialization_test_2/mp_42.golden new file mode 100644 index 0000000..a152baa --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_42.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_43.golden b/unit_test/golden/serialization_test_2/mp_43.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_43.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_44.golden b/unit_test/golden/serialization_test_2/mp_44.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_44.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_45.golden b/unit_test/golden/serialization_test_2/mp_45.golden new file mode 100644 index 0000000..a152baa --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_45.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_46.golden b/unit_test/golden/serialization_test_2/mp_46.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_46.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_47.golden b/unit_test/golden/serialization_test_2/mp_47.golden new file mode 100644 index 0000000..a152baa --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_47.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_48.golden b/unit_test/golden/serialization_test_2/mp_48.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_48.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_49.golden b/unit_test/golden/serialization_test_2/mp_49.golden new file mode 100644 index 0000000..a152baa --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_49.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.82957 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_2/mp_50.golden b/unit_test/golden/serialization_test_2/mp_50.golden new file mode 100644 index 0000000..331c462 --- /dev/null +++ b/unit_test/golden/serialization_test_2/mp_50.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 3.38855 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_01.golden b/unit_test/golden/serialization_test_3/mp_01.golden new file mode 100644 index 0000000..7022576 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_01.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 5.57103 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.62988 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 2.62988 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.270887 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.108837 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.124449 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.429958 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.378696 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0423591 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00829358 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.296802 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.63209 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.124915 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.131136 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.435435 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.147972 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.287464 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.408579 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_02.golden b/unit_test/golden/serialization_test_3/mp_02.golden new file mode 100644 index 0000000..ae2a0ba --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_02.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.55677 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.67831 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.67831 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.79113 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.38768 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.36816 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00303365 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.101063 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.115559 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.399522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.162574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0188973 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00382836 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.430633 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.219416 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.92352 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.104262 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.110483 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.634344 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.148471 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.485873 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.2765 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345721 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_03.golden b/unit_test/golden/serialization_test_3/mp_03.golden new file mode 100644 index 0000000..d58e9b0 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_03.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 14.4373 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.4962 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.4962 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.46747 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.38681 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.36816 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00303365 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.412025 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.471127 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.898557 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.560366 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0672693 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0139766 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.552479 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.39277 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.705945 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.712166 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.29978 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.574954 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.72483 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.08445 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.69718 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_04.golden b/unit_test/golden/serialization_test_3/mp_04.golden new file mode 100644 index 0000000..3ba03c5 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_04.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.09671 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.21825 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.21825 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.6437 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.38681 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.36816 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00303365 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.49387 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.055 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00882945 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.106807 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.70686 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.860739 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0434854 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.817253 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_05.golden b/unit_test/golden/serialization_test_3/mp_05.golden new file mode 100644 index 0000000..77bcc6e --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_05.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.3287 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.40168 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.40168 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.39308 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.147707 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.168894 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.472355 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.298664 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0370256 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00788765 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.180443 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.25071 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.13039 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.13661 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.719541 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.214623 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.504918 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.389238 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.534296 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.024995 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.156705 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0872393 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0692327 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000233495 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_06.golden b/unit_test/golden/serialization_test_3/mp_06.golden new file mode 100644 index 0000000..4043d62 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_06.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.31132 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.510085 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0166632 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00149292 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.491929 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0730356 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0405653 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_07.golden b/unit_test/golden/serialization_test_3/mp_07.golden new file mode 100644 index 0000000..f4bf697 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_07.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 62.5942 W + Total Leakage = 8.05706 W + Peak Dynamic = 54.5372 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 4.10869 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.529939 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.11342e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0109648 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 32.3058 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 3.56778 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.529939 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0176044 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00712848 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00430633 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00492404 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0184442 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0120452 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 4.48277e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00119273 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000206291 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 3.17222e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 1.00986e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0177459 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.00423108 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.00408974 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.00817948 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.00311395 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.473025 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.00750857 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00768994 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000181366 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0152874 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.00556447 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.00972289 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.079676 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0164934 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 5.56708e-06 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.11342e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 8.07646 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 3.56778 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.641143 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.27657 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 1.10625 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.650067 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0109648 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0109648 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.00610421 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.00484427 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 1.63379e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_08.golden b/unit_test/golden/serialization_test_3/mp_08.golden new file mode 100644 index 0000000..85b8033 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_08.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.03469 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.09355 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.09355 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.217467 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0466444 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0533351 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.471442 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.463798 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.293707 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.293718 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.64693 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.171945 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.178166 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.335052 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0884974 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.246555 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.163763 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.157146 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_09.golden b/unit_test/golden/serialization_test_3/mp_09.golden new file mode 100644 index 0000000..2908e1d --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_09.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 11.4799 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.50741 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.50741 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.27928 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.072172 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.65984 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.63601 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00374947 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.0100407 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.589586 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.333117 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0355772 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.207941 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.24769 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.402366 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.408587 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.706254 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.290047 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.416207 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.633502 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.942875 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0494049 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_10.golden b/unit_test/golden/serialization_test_3/mp_10.golden new file mode 100644 index 0000000..03e2ed4 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_10.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 16.2725 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.5168 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.5168 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.93886 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.909135 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.351154 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0376247 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.607152 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.215962 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.32042 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.74109 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.454124 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.460345 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.899058 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.29785 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.601209 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.614712 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.911446 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0940233 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0523436 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0415396 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000140097 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_11.golden b/unit_test/golden/serialization_test_3/mp_11.golden new file mode 100644 index 0000000..15c8e4d --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_11.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 12.0385 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.18855 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000572853 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.18855 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.14659 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.772634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.385997 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.096126 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.29453 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0462832 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0525041 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.394628 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0572483 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.33738 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0886051 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0314292 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.024995 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000572853 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.188047 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.104687 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0830792 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000280194 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_12.golden b/unit_test/golden/serialization_test_3/mp_12.golden new file mode 100644 index 0000000..b5858c7 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_12.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 65.2393 W + Total Leakage = 8.05706 W + Peak Dynamic = 57.1823 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 4.45057 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.448096 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.60015e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0227617 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 34.951 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 3.97969 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.448096 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.0164081 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.000762322 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.000575993 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.0146043 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 4.00667e-05 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.0145092 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 1.4962e-05 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 4.00667e-05 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.000217153 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.000248301 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.00670461 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.00601644 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 4.29495e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000575561 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 9.55985e-05 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 3.03931e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 9.6755e-06 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.422904 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000173767 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000173767 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.00401056 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.000906993 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.00310357 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00207946 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 1.60015e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 8.73774 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 3.97969 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.653454 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.32092 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 1.76753 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.00532 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0227617 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0227617 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0126716 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0100561 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 3.39155e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_13.golden b/unit_test/golden/serialization_test_3/mp_13.golden new file mode 100644 index 0000000..cd39210 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_13.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.4264 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.54798 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.54798 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.95565 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.202126 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.231119 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.74954 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.367569 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0341287 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00545207 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.561746 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.06743 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.285165 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.291385 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.866558 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.278929 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.58763 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.501975 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.722871 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_14.golden b/unit_test/golden/serialization_test_3/mp_14.golden new file mode 100644 index 0000000..4ad34d5 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_14.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.7587 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.88023 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.88023 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.509135 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.772554 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.640723 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0737115 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0147885 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.851345 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.41983 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.7205 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.320251 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.326471 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.733381 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.252631 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.480751 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.520764 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.7543 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_15.golden b/unit_test/golden/serialization_test_3/mp_15.golden new file mode 100644 index 0000000..282318c --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_15.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 15.89 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 13.0115 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 13.0115 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.82829 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.072172 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.427573 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.488905 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.42285 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.656813 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0691068 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0127588 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.11063 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.361088 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.192882 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.385765 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.427227 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 6.22251 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.847532 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.853753 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.56071 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.51614 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 1.04457 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.08445 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.69718 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_16.golden b/unit_test/golden/serialization_test_3/mp_16.golden new file mode 100644 index 0000000..3a59f23 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_16.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.76266 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.8842 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.8842 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.32082 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.132159 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.151116 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.486611 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0901012 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00959842 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00179871 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.408477 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.42798 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.299597 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.305818 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.521486 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.253067 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.268419 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.370448 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.502867 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_17.golden b/unit_test/golden/serialization_test_3/mp_17.golden new file mode 100644 index 0000000..c80fa6e --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_17.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.317 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.43855 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.43855 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.22778 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0454123 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.240996 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.275565 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.927122 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.681436 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0638628 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0103232 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.460282 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.71655 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.354838 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.361059 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.31559 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.286708 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 1.02888 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.614712 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.911446 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_18.golden b/unit_test/golden/serialization_test_3/mp_18.golden new file mode 100644 index 0000000..2109fd7 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_18.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.3942 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.51573 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.51573 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.12343 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.35138 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.198 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.020775 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00382836 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.137251 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.82357 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.122178 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.128399 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.752581 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.158952 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.593629 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.220132 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.251433 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_19.golden b/unit_test/golden/serialization_test_3/mp_19.golden new file mode 100644 index 0000000..785d608 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_19.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 11.0648 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.15503 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.15503 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.02705 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.0681 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.06145 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00124657 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.310962 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.355567 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.744805 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.657787 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0724329 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0139766 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.445203 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.93783 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.560376 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.566597 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.899589 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.465575 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.434015 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.840186 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.2886 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_20.golden b/unit_test/golden/serialization_test_3/mp_20.golden new file mode 100644 index 0000000..8df7fed --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_20.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 15.5517 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.87321 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 11.5528 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.87321 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.7995 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.035102 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.423822 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.188495 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0180881 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.112145 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.40409 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.721361 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0390219 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.682339 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0886051 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0314292 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000146267 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 11.5528 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.33748 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 4.78523 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 5.43004 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_21.golden b/unit_test/golden/serialization_test_3/mp_21.golden new file mode 100644 index 0000000..b5e7745 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_21.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 13.1143 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.2358 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.2358 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.31285 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.388703 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.444459 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.12835 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.861159 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0821208 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0135707 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.750947 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.192882 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.385765 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.93685 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.643238 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.649459 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.24311 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.551439 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.691673 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.990503 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.54003 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_22.golden b/unit_test/golden/serialization_test_3/mp_22.golden new file mode 100644 index 0000000..0c567c1 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_22.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.89626 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.01741 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.01741 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.853266 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0144815 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0466444 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0533351 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.456697 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0268088 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00187768 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.313949 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12648 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.104262 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.110483 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.925008 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0753024 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.849706 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.163763 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.157146 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_23.golden b/unit_test/golden/serialization_test_3/mp_23.golden new file mode 100644 index 0000000..c797630 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_23.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 12.1602 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.359 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.359 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.2912 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0247917 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.81917 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.163255 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.186673 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.79656 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557446 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.057291 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0103232 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.348937 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.8155 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.204294 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.210514 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.01705 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.250416 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.766633 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.445606 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_24.golden b/unit_test/golden/serialization_test_3/mp_24.golden new file mode 100644 index 0000000..59379eb --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_24.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 11.1508 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.2723 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.2723 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.32189 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0618617 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.01257 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.681436 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0638628 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0103232 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.661142 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.176224 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.1165 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.324481 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.330701 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.760881 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.264609 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.496272 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.577133 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.848588 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_25.golden b/unit_test/golden/serialization_test_3/mp_25.golden new file mode 100644 index 0000000..ebfde1e --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_25.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 14.1445 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.42049 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.42049 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.64735 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0247917 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34201 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.500463 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.332468 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0333598 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.005858 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.468013 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.278158 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.72456 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0890828 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0953037 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.686666 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.153353 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.533314 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.220132 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.251433 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_26.golden b/unit_test/golden/serialization_test_3/mp_26.golden new file mode 100644 index 0000000..b7d0104 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_26.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 13.6504 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.7719 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.7719 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 4.29191 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0618617 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 3.38681 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 3.36816 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00303365 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00780651 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.295414 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.337789 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.09355 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.513489 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0560526 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0107292 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.653798 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.262609 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0876738 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.32042 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.41225 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.576302 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.582523 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.04629 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.412456 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.633833 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.765028 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.16288 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_27.golden b/unit_test/golden/serialization_test_3/mp_27.golden new file mode 100644 index 0000000..3f35467 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_27.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.3042 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.42574 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.42574 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.61597 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0454123 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.69759 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.395112 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0388631 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.351474 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.60049 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.184885 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.191105 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.736001 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.191756 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.544246 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.445606 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_28.golden b/unit_test/golden/serialization_test_3/mp_28.golden new file mode 100644 index 0000000..3f86904 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_28.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.04319 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.16473 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.16473 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.76782 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0247917 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.124385 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.142227 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.438795 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.205559 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0168096 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.211216 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.61339 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.285165 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.291385 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.942438 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.226769 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.715668 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.351659 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.471438 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_29.golden b/unit_test/golden/serialization_test_3/mp_29.golden new file mode 100644 index 0000000..065729a --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_29.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 11.1563 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.27784 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.27784 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.03117 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.264318 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.302232 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.990857 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.852303 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0816514 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0135707 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.795878 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.259155 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.43323 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.488712 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.494933 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.875903 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.389686 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.486217 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.671081 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.00573 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_30.golden b/unit_test/golden/serialization_test_3/mp_30.golden new file mode 100644 index 0000000..ac72950 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_30.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.62053 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.74206 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.74206 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.91544 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0544184 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0622243 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.89636 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.385282 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0350676 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00545207 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.494717 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.219416 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.293718 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.14183 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.082862 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0890828 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.868814 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0806605 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.788153 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.182553 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.188575 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_31.golden b/unit_test/golden/serialization_test_3/mp_31.golden new file mode 100644 index 0000000..4e24413 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_31.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 14.6694 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.791 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.791 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.98703 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.450895 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.515573 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.16477 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.692888 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0732019 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0135707 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.48106 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.490666 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.298091 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.596182 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.91777 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.875899 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.88212 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.42623 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.620093 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.806132 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.12203 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.76003 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_32.golden b/unit_test/golden/serialization_test_3/mp_32.golden new file mode 100644 index 0000000..2e0f2b1 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_32.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 6.23869 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.36022 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.36022 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.21823 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.364189 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.134708 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0130542 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.170886 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.117483 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.47341 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.840906 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0324703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.808436 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_33.golden b/unit_test/golden/serialization_test_3/mp_33.golden new file mode 100644 index 0000000..e5da613 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_33.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 15.5372 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.2177 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 10.2177 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.13365 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.443121 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.506683 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.26781 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 1.00147 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.114662 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0229071 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 1.17892 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.463022 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.227952 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.455904 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.55719 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.749491 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.755712 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.29774 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.546453 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.751283 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.1784 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.85432 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_34.golden b/unit_test/golden/serialization_test_3/mp_34.golden new file mode 100644 index 0000000..1125afa --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_34.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.4684 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.58994 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.58994 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.02291 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.343274 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.144187 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.86596 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.119441 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.125662 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.985847 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.125914 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.859933 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_35.golden b/unit_test/golden/serialization_test_3/mp_35.golden new file mode 100644 index 0000000..911e597 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_35.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.9744 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.65488 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.65488 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.65037 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0289629 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.901505 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.808021 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0793043 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0135707 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.355543 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.145126 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.74747 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.244854 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.251075 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.778032 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.262011 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.516022 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.558343 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.817159 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_36.golden b/unit_test/golden/serialization_test_3/mp_36.golden new file mode 100644 index 0000000..5f599a0 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_36.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.3353 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.45685 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.45685 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.49685 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0186526 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.202126 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.231119 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.706165 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.207505 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0234618 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00464021 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.572102 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.122743 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.245487 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.293718 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.38802 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.48473 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.490951 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.758977 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.346826 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.41215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.78573 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_37.golden b/unit_test/golden/serialization_test_3/mp_37.golden new file mode 100644 index 0000000..880a98c --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_37.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 13.1241 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.80465 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.80465 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.70981 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0392732 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.202126 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.231119 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.01257 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.681436 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0638628 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0103232 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.438451 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.48361 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.306564 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.312785 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.34678 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.249475 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 1.09731 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.501975 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.722871 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_38.golden b/unit_test/golden/serialization_test_3/mp_38.golden new file mode 100644 index 0000000..44e5a2a --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_38.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 15.0415 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 12.163 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 12.163 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.91132 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86571 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.00173993 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.310962 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.355567 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.45172 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.414446 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0453455 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00869951 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.882631 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.32135 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.614138 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.30319 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.632289 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.63851 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.26064 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.431044 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.829595 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.821397 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.25717 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_39.golden b/unit_test/golden/serialization_test_3/mp_39.golden new file mode 100644 index 0000000..d310945 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_39.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 12.1269 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.80742 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.80742 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.61015 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0289629 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.147707 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.168894 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.699429 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.395436 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0399718 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.740978 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.466476 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.59665 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.24983 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.256051 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.767658 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.196254 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.571404 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.565725 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_40.golden b/unit_test/golden/serialization_test_3/mp_40.golden new file mode 100644 index 0000000..5046736 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_40.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 9.8153 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.93684 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.93684 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.41199 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0144815 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.202126 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.231119 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.723728 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.421356 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0391627 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00626393 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.620633 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.188319 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.122743 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.245487 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.16021 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.02028 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.373501 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.379722 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.716086 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.299174 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.416912 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.78573 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_41.golden b/unit_test/golden/serialization_test_3/mp_41.golden new file mode 100644 index 0000000..b6b0b78 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_41.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.3357 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.9849 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.9849 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.95065 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0289629 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.116611 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.133338 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.631122 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.492856 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0451354 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.169762 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.17996 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0938109 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.100032 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.878752 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.132574 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.746178 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.332869 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.440009 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_42.golden b/unit_test/golden/serialization_test_3/mp_42.golden new file mode 100644 index 0000000..f69b8c9 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_42.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 14.8992 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 12.0208 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 12.0208 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.74567 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.56937 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.404251 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.462238 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.58406 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.597089 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0735818 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0156003 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.753199 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.160675 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.122743 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.245487 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.560735 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 6.3771 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.923675 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.929896 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.52599 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.537671 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.988315 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.04687 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.63432 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_43.golden b/unit_test/golden/serialization_test_3/mp_43.golden new file mode 100644 index 0000000..67c69e2 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_43.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 83.4832 W + Total Leakage = 8.05706 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.5765 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.22571 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.22571 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.34566 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.0681 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.06145 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00124657 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00270352 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0388703 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0444459 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.669645 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.339378 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.027177 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00342243 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.421076 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.293707 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.186912 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.60242 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0855992 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0918201 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.597996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.099954 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.498042 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.144974 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.125717 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_44.golden b/unit_test/golden/serialization_test_3/mp_44.golden new file mode 100644 index 0000000..a977a60 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_44.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.9133 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.03484 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.03484 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.81925 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.450156 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.287537 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0287953 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00504614 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.189855 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.49548 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.241619 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.247839 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.80286 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.2274 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.57546 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.565725 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_45.golden b/unit_test/golden/serialization_test_3/mp_45.golden new file mode 100644 index 0000000..6b4e145 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_45.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.5543 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64448 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64448 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.19157 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0515514 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.2099 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.240008 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.973455 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.746675 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0444099 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0782358 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0143825 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00369946 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.509757 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.210417 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.91629 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.316767 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.322988 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.7895 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.273479 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.516022 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.558343 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.817159 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_46.golden b/unit_test/golden/serialization_test_3/mp_46.golden new file mode 100644 index 0000000..7671a31 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_46.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 8.13506 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.25659 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.25659 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.11446 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0412411 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86397 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0621925 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0711135 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.126081 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0359896 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0034558 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000580916 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.12657 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.83608 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0358324 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0420532 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.863238 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.14033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0229329 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.699975 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.163763 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.282353 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_47.golden b/unit_test/golden/serialization_test_3/mp_47.golden new file mode 100644 index 0000000..90b2672 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_47.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.967 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.0572 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.0572 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.74228 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.835898 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.665994 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0706849 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.169762 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.25585 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.219472 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.225693 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.628106 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.247703 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.380403 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.565725 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_48.golden b/unit_test/golden/serialization_test_3/mp_48.golden new file mode 100644 index 0000000..e4b3afc --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_48.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.72299 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.84452 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.84452 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.82483 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0454123 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0466444 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0533351 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.455999 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.368542 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0374548 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.106477 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.43052 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0338415 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0400624 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.683967 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0679415 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.616025 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.163763 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.157146 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_49.golden b/unit_test/golden/serialization_test_3/mp_49.golden new file mode 100644 index 0000000..78908d3 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_49.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 10.1856 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.27579 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.27579 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.96308 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0247917 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.225448 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.257786 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.558126 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.378696 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0423591 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00829358 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.625646 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.140278 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.280556 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.04884 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.361557 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.367778 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.734095 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.352987 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.381108 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.595922 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.880017 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_3/mp_50.golden b/unit_test/golden/serialization_test_3/mp_50.golden new file mode 100644 index 0000000..3d1ce89 --- /dev/null +++ b/unit_test/golden/serialization_test_3/mp_50.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 17.0068 mm^2 + Peak Power = 59.073 W + Total Leakage = 8.05706 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 8.04524 W + Subthreshold Leakage with power gating = 2.99635 W + Gate Leakage = 0.0118216 W + Runtime Dynamic = 7.77956 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.90109 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 2.34271 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 0.615508 W + Subthreshold Leakage with power gating = 0.157502 W + Gate Leakage = 0.000548074 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.90109 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.41803 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0144815 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.04567 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.03886 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00107128 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00286878 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.108837 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.124449 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.807725 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.566627 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0588691 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0107292 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.200536 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.368 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.11322 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.119441 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.01215 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.124353 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.887796 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.408579 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 0.585677 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.153877 W + Subthreshold Leakage with power gating = 0.0393754 W + Gate Leakage = 0.000137019 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 0.306325 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.0568985 W + Subthreshold Leakage with power gating = 0.0136556 W + Gate Leakage = 5.25099e-05 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 0.0491871 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.0091363 W + Subthreshold Leakage with power gating = 0.00219271 W + Gate Leakage = 8.4316e-06 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_01.golden b/unit_test/golden/serialization_test_4/mp_01.golden new file mode 100644 index 0000000..6a8a9dd --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_01.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 64.1004 W + Total Leakage = 10.5679 W + Peak Dynamic = 53.5325 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 5.02189 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.08058 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 7.74125e-05 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.029647 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 31.3012 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 3.91159 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.08058 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.230238 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0367295 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00139328 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.141307 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000387672 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.140386 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000144767 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000387672 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.021011 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0240248 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.113419 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0800302 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.00120599 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.00769611 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00128532 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 1.47037e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 9.3617e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0413168 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0118478 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0236956 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0144333 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.67779 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0332228 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0340634 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000840656 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.0804273 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0251842 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0552431 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.11552 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0764494 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0033777 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 7.74125e-05 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.8253 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 3.91159 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.67825 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.41025 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.855088 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.823086 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.029647 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.029647 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0165047 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0130981 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.41748e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_02.golden b/unit_test/golden/serialization_test_4/mp_02.golden new file mode 100644 index 0000000..231c36d --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_02.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 15.0212 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.77873 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 8.21091 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.77873 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.25785 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.81917 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.124385 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.142227 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.56457 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.0815692 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0102377 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.612199 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.336899 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.293718 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.05039 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.296114 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.293376 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.849438 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.194522 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0388901 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.616025 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.658994 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000190951 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 8.21091 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 4.31128 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_03.golden b/unit_test/golden/serialization_test_4/mp_03.golden new file mode 100644 index 0000000..f8d6e9c --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_03.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.50015 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.62169 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.62169 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.39034 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.244507 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.522834 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.51943 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000535638 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00143439 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.240996 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.275565 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.695118 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.2695 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.00892435 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0267477 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00464021 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000692766 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.463943 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.262609 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.83197 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.412071 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0303579 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.748174 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.27423 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0333977 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.440546 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.558343 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.378552 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.19278 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_04.golden b/unit_test/golden/serialization_test_4/mp_04.golden new file mode 100644 index 0000000..705c5b0 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_04.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 7.17635 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.85687 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 1.85687 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.102109 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.343274 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.144187 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.05368 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0338415 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0400624 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.259168 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0424183 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.216749 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_05.golden b/unit_test/golden/serialization_test_4/mp_05.golden new file mode 100644 index 0000000..c47ae9e --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_05.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 65.4858 W + Total Leakage = 10.5679 W + Peak Dynamic = 54.9179 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 3.90749 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.613051 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.025787 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 32.6865 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 3.26865 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.613051 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.169541 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.00172729 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0013051 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.165454 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00045392 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.164377 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000169506 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00045392 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.000492029 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.000562607 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.010827 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.00966739 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 9.73159e-06 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000955808 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000165226 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 6.88653e-06 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 2.1923e-05 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.427981 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000393725 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.000393725 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.00908722 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.00205508 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.00703213 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.00470243 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 8.17163 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 3.26865 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 1.20142 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.48057 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.025787 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.025787 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0143558 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0113927 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 3.84233e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_06.golden b/unit_test/golden/serialization_test_4/mp_06.golden new file mode 100644 index 0000000..0143976 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_06.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.528 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.64953 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.64953 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.08154 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34201 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.279866 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.320011 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.935454 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.520399 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0498698 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00829358 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.232578 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.186912 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.21305 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.449396 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.455617 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.18761 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.383054 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.804551 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.765028 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.16288 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_07.golden b/unit_test/golden/serialization_test_4/mp_07.golden new file mode 100644 index 0000000..d8889e2 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_07.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.63967 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.76121 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.76121 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.70349 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.217674 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.248897 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.753512 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.477414 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0519575 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.260546 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.91015 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.261526 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.267747 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.760647 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.260318 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.500328 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.78573 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_08.golden b/unit_test/golden/serialization_test_4/mp_08.golden new file mode 100644 index 0000000..06ab182 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_08.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.3106 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.43214 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.43214 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.4585 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86397 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.186577 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.21334 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.821607 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.377399 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0379243 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00666986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.190654 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.74777 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.428742 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.434963 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.1406 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.277765 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0651643 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.797676 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.464396 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.910428 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_09.golden b/unit_test/golden/serialization_test_4/mp_09.golden new file mode 100644 index 0000000..9c5171d --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_09.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.8464 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.96797 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.96797 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.211 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.986152 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.710925 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0752494 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0139766 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.233047 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.45766 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.378976 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.385196 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.762345 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.264191 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0181083 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.480046 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.595922 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.13043 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_10.golden b/unit_test/golden/serialization_test_4/mp_10.golden new file mode 100644 index 0000000..0753283 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_10.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 13.7121 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.925 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 8.66133 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.925 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.83281 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.353671 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0544184 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0622243 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.219208 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.117319 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0132241 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00261057 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0213613 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.79788 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.158757 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.147061 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.783254 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.151506 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0524027 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.579345 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.220004 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000337218 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 8.66133 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 1.09598 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 3.91519 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 3.65015 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_11.golden b/unit_test/golden/serialization_test_4/mp_11.golden new file mode 100644 index 0000000..beeebe6 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_11.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.5749 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.69642 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.69642 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.28245 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.299089 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.279866 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.320011 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.94714 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.682409 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.067189 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.011541 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.233047 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.15367 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.537732 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.526037 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.948067 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.383226 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0705111 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.49433 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.727449 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.35044 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_12.golden b/unit_test/golden/serialization_test_4/mp_12.golden new file mode 100644 index 0000000..24492d3 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_12.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.59457 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.69893 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.69893 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.7487 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0310962 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0355567 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.431602 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.306223 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0330602 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00626393 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0213613 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.41901 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0365787 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0427996 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.72744 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0565654 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.670875 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.126184 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0942875 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0248487 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.125364 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0697915 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0553861 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0.000186796 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_13.golden b/unit_test/golden/serialization_test_4/mp_13.golden new file mode 100644 index 0000000..9d9c213 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_13.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 14.4609 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.6597 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.76989 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.6597 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.05359 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.771724 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.448899 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.043897 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00748172 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.254409 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.44647 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.501153 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.489458 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.34265 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.359131 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0705111 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.913012 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.671081 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.25615 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.76989 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.854481 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 3.04515 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 1.87027 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_14.golden b/unit_test/golden/serialization_test_4/mp_14.golden new file mode 100644 index 0000000..d6b68e9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_14.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.51147 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.633 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.633 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.10462 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.171029 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.195562 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.820694 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.611558 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0444099 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0634336 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.011541 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00369946 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.222367 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.43192 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.271977 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.278197 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.601012 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.204383 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.396629 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.464396 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.660013 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_15.golden b/unit_test/golden/serialization_test_4/mp_15.golden new file mode 100644 index 0000000..3441899 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_15.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.94324 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.00209 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.00209 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.7426 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.116611 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.133338 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.299025 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.143888 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.00892435 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0146323 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00261057 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000692766 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.032042 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.82362 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.265756 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.25406 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.944586 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.211314 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0705111 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.662762 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.332869 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.690423 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_16.golden b/unit_test/golden/serialization_test_4/mp_16.golden new file mode 100644 index 0000000..6a633c9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_16.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.9614 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.08291 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.08291 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.58807 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86397 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.279866 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.320011 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.03278 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.755207 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0775965 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0139766 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.233047 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.14891 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.537732 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.526037 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.943306 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.383226 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0705111 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.489568 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.727449 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.35044 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_17.golden b/unit_test/golden/serialization_test_4/mp_17.golden new file mode 100644 index 0000000..3e6fa4f --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_17.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.78519 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.40303 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 3.40303 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.76964 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.28068 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.171431 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0193667 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00382836 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0213613 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.25324 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.648464 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0433704 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.605093 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.107395 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.0247025 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_18.golden b/unit_test/golden/serialization_test_4/mp_18.golden new file mode 100644 index 0000000..c528463 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_18.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.0943 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.21583 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.21583 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.04889 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.264318 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.302232 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.8372 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.583691 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0575905 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.233047 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.01658 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.537732 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.526037 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.911418 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.372326 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0705111 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.468581 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.68987 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.322184 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.28758 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_19.golden b/unit_test/golden/serialization_test_4/mp_19.golden new file mode 100644 index 0000000..70d396c --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_19.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.6908 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.37131 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.37131 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.48629 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.109164 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0144815 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.116611 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.133338 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.558224 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.415095 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0475629 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00951137 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0739656 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.19942 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.112473 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.118694 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.879546 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.134777 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.744769 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.332869 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.440009 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_20.golden b/unit_test/golden/serialization_test_4/mp_20.golden new file mode 100644 index 0000000..1c02c31 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_20.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.3233 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.44488 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.44488 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.45449 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.894049 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.367894 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0352374 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.005858 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.255208 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.57412 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.456363 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.462584 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.06638 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.306627 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.759757 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.520764 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.7543 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_21.golden b/unit_test/golden/serialization_test_4/mp_21.golden new file mode 100644 index 0000000..02bf3b5 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_21.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.89069 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.57121 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.57121 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.01613 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.136456 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0125135 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.5685 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.55829 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00160691 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00430316 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.124385 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.142227 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.464868 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.290132 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0376649 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00829358 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.032042 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.97807 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.240872 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.247093 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.436854 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.168436 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.268419 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.351659 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.471438 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_22.golden b/unit_test/golden/serialization_test_4/mp_22.golden new file mode 100644 index 0000000..b6e10d0 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_22.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.05632 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.17786 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.17786 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.35788 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.136456 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.00834236 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.8183 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.80987 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00142674 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00350334 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.124385 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.142227 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.763731 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.217335 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0272573 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.005858 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.128168 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.32042 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.60766 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.229177 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.235398 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.693636 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.182015 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.511621 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.351659 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.471438 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_23.golden b/unit_test/golden/serialization_test_4/mp_23.golden new file mode 100644 index 0000000..006bccb --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_23.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 85.9941 W + Total Leakage = 10.5679 W + Peak Dynamic = 75.4261 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.3236 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.00414 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 53.1948 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 5.31948 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.00414 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.58777 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.178803 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.204451 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.731264 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.565654 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.055543 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00951137 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.211686 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.44673 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.294123 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.300344 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.586176 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.216 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.370176 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.483185 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.691442 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 13.2987 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 5.31948 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 6.32849 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 2.5314 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_24.golden b/unit_test/golden/serialization_test_4/mp_24.golden new file mode 100644 index 0000000..2b4abf9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_24.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.9025 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.02402 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.02402 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.32926 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.86397 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.84873 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00249802 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00637212 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.132159 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.151116 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.895888 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.368218 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0363461 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00626393 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.329498 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.117483 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.20236 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.216735 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.222956 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.810579 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.161792 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0754407 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.573346 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.29529 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.434921 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.87798 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_25.golden b/unit_test/golden/serialization_test_4/mp_25.golden new file mode 100644 index 0000000..5d90d2a --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_25.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 7.30632 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.36518 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.36518 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.2773 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.271798 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.773504 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.771009 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000355463 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000634566 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0932887 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.10667 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.291747 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.144213 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.015741 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0030165 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.18658 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.52906 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.119441 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.107745 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.653364 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.101985 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.117004 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.434375 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.182553 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.49129 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.814612 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0.000381902 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0626822 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0348957 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0276931 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 9.33981e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_26.golden b/unit_test/golden/serialization_test_4/mp_26.golden new file mode 100644 index 0000000..88e007c --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_26.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 3.6192 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.697996 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0427379 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 0.697996 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 0.245226 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0.237652 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000651995 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0.236104 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000243472 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.000651995 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00353366 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00404054 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0006786 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000108893 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 6.98905e-05 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000213372 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 7.95393e-05 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 4.94578e-05 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000157447 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 0.452091 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00282766 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00282766 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.033198 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0147592 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.0184388 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0427379 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0427379 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0237925 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0188816 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 6.36805e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_27.golden b/unit_test/golden/serialization_test_4/mp_27.golden new file mode 100644 index 0000000..0df13e5 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_27.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 12.373 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.46324 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.46324 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.93982 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.303188 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.346678 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.11503 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.58726 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0697863 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0143825 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.287719 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.88035 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.389924 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.378229 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.959654 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.296031 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0717656 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.591858 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.614712 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.7879 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_28.golden b/unit_test/golden/serialization_test_4/mp_28.golden new file mode 100644 index 0000000..41070d1 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_28.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.00228 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.12381 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.12381 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.53671 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.539148 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.280952 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0360868 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00788765 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.139788 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.77465 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.201556 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.138103 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0758947 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.708797 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0853309 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0725167 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.550949 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0886051 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.156637 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_29.golden b/unit_test/golden/serialization_test_4/mp_29.golden new file mode 100644 index 0000000..6111a9c --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_29.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.1778 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.98538 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.02284 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557122 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0561823 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.416028 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.63016 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.370017 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0724111 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.912996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.28548 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.135301 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.492215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.66218 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_30.golden b/unit_test/golden/serialization_test_4/mp_30.golden new file mode 100644 index 0000000..dd81964 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_30.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.5103 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.63188 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.63188 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.63214 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.834522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.568574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0655213 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.223635 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.86147 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.262272 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.259535 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.05875 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.188033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0654576 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.805256 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.31697 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_31.golden b/unit_test/golden/serialization_test_4/mp_31.golden new file mode 100644 index 0000000..8bcf3d2 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_31.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.48488 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.647 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.479515 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.227164 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0310528 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.192392 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12266 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.167715 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.116703 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.063453 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.789333 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.129918 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0698433 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.589572 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345212 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0.0313411 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0.0174479 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0.0138465 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 4.6699e-05 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_32.golden b/unit_test/golden/serialization_test_4/mp_32.golden new file mode 100644 index 0000000..1e0ecf9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_32.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.1465 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.98538 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.02284 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557122 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0561823 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.416028 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.63016 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.370017 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0724111 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.912996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.28548 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.135301 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.492215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.66218 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_33.golden b/unit_test/golden/serialization_test_4/mp_33.golden new file mode 100644 index 0000000..72df69b --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_33.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.5207 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.64245 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.834522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.568574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0655213 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.223635 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.86147 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.262272 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.259535 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.05875 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.188033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0654576 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.805256 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.31697 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_34.golden b/unit_test/golden/serialization_test_4/mp_34.golden new file mode 100644 index 0000000..aff580e --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_34.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.44323 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.56477 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.56477 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.63669 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.479515 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.227164 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0310528 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.192392 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12266 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.167715 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.116703 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.063453 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.789333 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.129918 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0698433 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.589572 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345212 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_35.golden b/unit_test/golden/serialization_test_4/mp_35.golden new file mode 100644 index 0000000..3c6cf95 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_35.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.1568 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.27833 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.27833 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.99569 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.02284 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557122 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0561823 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.416028 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.63016 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.370017 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0724111 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.912996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.28548 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.135301 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.492215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.66218 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_36.golden b/unit_test/golden/serialization_test_4/mp_36.golden new file mode 100644 index 0000000..1e7d1fa --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_36.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.531 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.6525 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.6525 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.65276 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.834522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.568574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0655213 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.223635 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.86147 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.262272 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.259535 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.05875 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.188033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0654576 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.805256 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.31697 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_37.golden b/unit_test/golden/serialization_test_4/mp_37.golden new file mode 100644 index 0000000..5c88366 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_37.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.45354 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.647 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.479515 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.227164 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0310528 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.192392 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12266 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.167715 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.116703 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.063453 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.789333 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.129918 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0698433 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.589572 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345212 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_38.golden b/unit_test/golden/serialization_test_4/mp_38.golden new file mode 100644 index 0000000..1e0ecf9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_38.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.1465 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.98538 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.02284 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557122 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0561823 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.416028 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.63016 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.370017 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0724111 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.912996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.28548 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.135301 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.492215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.66218 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_39.golden b/unit_test/golden/serialization_test_4/mp_39.golden new file mode 100644 index 0000000..72df69b --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_39.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.5207 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.64245 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.834522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.568574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0655213 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.223635 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.86147 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.262272 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.259535 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.05875 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.188033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0654576 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.805256 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.31697 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_40.golden b/unit_test/golden/serialization_test_4/mp_40.golden new file mode 100644 index 0000000..8945f55 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_40.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.39085 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.51239 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.51239 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.58431 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0272911 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0125135 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.479515 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.227164 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0310528 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.192392 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12266 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.167715 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.116703 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.063453 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.789333 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.129918 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0698433 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.589572 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345212 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_41.golden b/unit_test/golden/serialization_test_4/mp_41.golden new file mode 100644 index 0000000..6ba89a8 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_41.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 14.702 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.8236 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 11.8236 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 3.65703 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0208559 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.61417 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.59715 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00267819 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00717194 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.412025 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.471127 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.70542 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.967241 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.146685 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0354909 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.138849 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.347122 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 5.97514 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.758449 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.76467 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.37994 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.525663 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.854278 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 1.14082 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.79146 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_42.golden b/unit_test/golden/serialization_test_4/mp_42.golden new file mode 100644 index 0000000..bd5ea8f --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_42.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.72182 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.84336 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.84336 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.61111 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.64998 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.116995 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0444099 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0121154 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00220464 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00369946 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.213284 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.293718 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.07527 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.122178 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.128399 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.00622085 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.963809 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.132976 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.830833 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.107395 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.0628584 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_43.golden b/unit_test/golden/serialization_test_4/mp_43.golden new file mode 100644 index 0000000..0b6eb19 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_43.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 16.4309 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 13.5525 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 13.5525 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.25877 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0309308 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 0 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 0 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.49754 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.568908 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.65893 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.84442 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.265489 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0845081 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0147885 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.0028469 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.0196543 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.792933 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.317896 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.122743 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.245487 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.267017 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 9.57482 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.885603 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.697982 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.200064 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.52674 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.384999 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.278646 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.863096 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.802607 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 1.56229 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 4.23072 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_44.golden b/unit_test/golden/serialization_test_4/mp_44.golden new file mode 100644 index 0000000..2dac8af --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_44.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 7.0968 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.21834 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 4.21834 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.45436 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.00777406 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.00888918 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.0869382 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.000239566 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.000469419 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.000174986 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0801027 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0587414 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.0534033 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.54353 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.0517577 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0186626 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0455368 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.987431 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0736789 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.136531 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.777221 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0698156 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.209447 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_45.golden b/unit_test/golden/serialization_test_4/mp_45.golden new file mode 100644 index 0000000..e6bcd96 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_45.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 9.1257 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.24724 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 6.24724 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.76451 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0103103 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29634 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0.000869963 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.155481 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.177784 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.773661 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.485622 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0502095 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00910544 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.197261 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.101934 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.106807 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.405 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.22644 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.214744 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.846481 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.156193 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.123646 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.566642 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.314079 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.15982 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_46.golden b/unit_test/golden/serialization_test_4/mp_46.golden new file mode 100644 index 0000000..4602015 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_46.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 12.388 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.50949 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 9.50949 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.93982 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.303188 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.346678 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.11503 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.58726 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0697863 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0143825 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.330912 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.129577 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.240315 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.88342 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.396145 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.38445 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.024137 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.956498 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.291057 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0735829 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.591858 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.614712 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.7879 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_47.golden b/unit_test/golden/serialization_test_4/mp_47.golden new file mode 100644 index 0000000..3614807 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_47.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 7.97529 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.09682 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.09682 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.54702 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0233222 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0266675 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.539148 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.280952 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0360868 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00788765 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.0965958 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0431924 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 1.78055 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.204294 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.131882 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0848528 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.711953 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.0903048 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0706994 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.550949 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.0886051 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.156637 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_48.golden b/unit_test/golden/serialization_test_4/mp_48.golden new file mode 100644 index 0000000..1e0ecf9 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_48.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 11.1465 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 8.26802 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 1.98538 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0545822 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 1.29547 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 1.29044 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.000891102 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00206895 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.256544 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.293343 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 1.02284 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.557122 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.0531804 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0561823 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0099173 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000606643 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00404585 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.416028 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.17277 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0526043 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.105209 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.213613 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 4.63016 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.429987 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.370017 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0724111 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.912996 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.28548 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.135301 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.492215 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.539554 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.604027 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.66218 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_49.golden b/unit_test/golden/serialization_test_4/mp_49.golden new file mode 100644 index 0000000..72df69b --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_49.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 10.5207 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 7.64219 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.64245 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.09134 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.07772 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00214255 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00573755 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.194351 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.22223 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.834522 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.568574 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.053382 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0655213 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.0131647 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000855561 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.00485643 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.223635 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.070139 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.080105 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 3.86147 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.262272 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.259535 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.0151789 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 1.05875 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.188033 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0654576 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.805256 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.408027 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.547658 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 1.31697 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/golden/serialization_test_4/mp_50.golden b/unit_test/golden/serialization_test_4/mp_50.golden new file mode 100644 index 0000000..5c88366 --- /dev/null +++ b/unit_test/golden/serialization_test_4/mp_50.golden @@ -0,0 +1,488 @@ +McPAT (version 1.3 of Feb, 2015) is computing the target processor... + + +McPAT (version 1.3 of Feb, 2015) results (current print level is 5) +***************************************************************************************** + Technology 22 nm + Using Long Channel Devices When Appropriate + Interconnect metal projection= aggressive interconnect technology projection + Core clock Rate(MHz) 4000 + +***************************************************************************************** +Processor: + Area = 30.5122 mm^2 + Peak Power = 61.5839 W + Total Leakage = 10.5679 W + Peak Dynamic = 51.0159 W + Subthreshold Leakage = 10.5538 W + Subthreshold Leakage with power gating = 3.59841 W + Gate Leakage = 0.0141367 W + Runtime Dynamic = 8.45354 W + + Total Cores: 1 cores + Device Type= ITRS high performance device type + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Total L3s: + Device Type= ITRS high performance device type + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + + Total First Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total Second Level Directory: + Device Type= ITRS high performance device type + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + + Total NoCs (Network/Bus): + Device Type= ITRS high performance device type + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Total MCs: 4 Memory Controllers + Device Type= ITRS high performance device type + Area = 15.8481 mm^2 + Peak Dynamic = 28.7846 W + Subthreshold Leakage = 3.12407 W + Subthreshold Leakage with power gating = 0.759556 W + Gate Leakage = 0.00286315 W + Runtime Dynamic = 2.87846 W + +***************************************************************************************** +Core: + Area = 14.5209 mm^2 + Peak Dynamic = 21.4709 W + Subthreshold Leakage = 7.37459 W + Subthreshold Leakage with power gating = 2.81964 W + Gate Leakage = 0.0111126 W + Runtime Dynamic = 5.57508 W + + Instruction Fetch Unit: + Area = 4.21345 mm^2 + Peak Dynamic = 6.46687 W + Subthreshold Leakage = 1.82119 W + Subthreshold Leakage with power gating = 0.794057 W + Gate Leakage = 0.00237235 W + Runtime Dynamic = 2.647 W + + Instruction Cache: + Area = 0.401338 mm^2 + Peak Dynamic = 0.525877 W + Subthreshold Leakage = 0.263687 W + Subthreshold Leakage with power gating = 0.100271 W + Gate Leakage = 0.000357244 W + Runtime Dynamic = 0.0818734 W + + Branch Target Buffer: + Area = 0.0520758 mm^2 + Peak Dynamic = 0.0417118 W + Subthreshold Leakage = 0.0146552 W + Subthreshold Leakage with power gating = 0.00712747 W + Gate Leakage = 1.31642e-05 W + Runtime Dynamic = 0.0206206 W + + Branch Predictor: + Area = 2.84935 mm^2 + Peak Dynamic = 4.45942 W + Subthreshold Leakage = 1.09633 W + Subthreshold Leakage with power gating = 0.525614 W + Gate Leakage = 0.00129566 W + Runtime Dynamic = 2.34114 W + + Global Predictor: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + Local Predictor: + L1_Local Predictor: + Area = 2.80133 mm^2 + Peak Dynamic = 4.42329 W + Subthreshold Leakage = 1.06533 W + Subthreshold Leakage with power gating = 0.509976 W + Gate Leakage = 0.00127034 W + Runtime Dynamic = 2.3293 W + + L2_Local Predictor: + Area = 0.00577096 mm^2 + Peak Dynamic = 0.00500092 W + Subthreshold Leakage = 0.00373552 W + Subthreshold Leakage with power gating = 0.0018393 W + Gate Leakage = 3.42177e-06 W + Runtime Dynamic = 0.00196238 W + + Chooser: + Area = 0.0203437 mm^2 + Peak Dynamic = 0.0137093 W + Subthreshold Leakage = 0.0134509 W + Subthreshold Leakage with power gating = 0.00682141 W + Gate Leakage = 1.07366e-05 W + Runtime Dynamic = 0.00493773 W + + RAS: + Area = 0.00156062 mm^2 + Peak Dynamic = 0.00370871 W + Subthreshold Leakage = 0.000356248 W + Subthreshold Leakage with power gating = 0.000155699 W + Gate Leakage = 4.21077e-07 W + Runtime Dynamic = 0 W + + Instruction Buffer: + Area = 0.00849419 mm^2 + Peak Dynamic = 0.621925 W + Subthreshold Leakage = 0.00352506 W + Subthreshold Leakage with power gating = 0.001566 W + Gate Leakage = 3.45542e-06 W + Runtime Dynamic = 0.0699665 W + + Instruction Decoder: + Area = 0.854606 mm^2 + Peak Dynamic = 0.711135 W + Subthreshold Leakage = 0.361292 W + Subthreshold Leakage with power gating = 0.130065 W + Gate Leakage = 0.000402065 W + Runtime Dynamic = 0.0800026 W + + Renaming Unit: + Area = 0.166931 mm^2 + Peak Dynamic = 2.77931 W + Subthreshold Leakage = 0.088364 W + Subthreshold Leakage with power gating = 0.0328301 W + Gate Leakage = 0.000308125 W + Runtime Dynamic = 0.479515 W + + Int Front End RAT with 0 internal checkpoints: + Area = 0.0211086 mm^2 + Peak Dynamic = 1.44537 W + Subthreshold Leakage = 0.00103172 W + Subthreshold Leakage with power gating = 0.000494367 W + Gate Leakage = 1.29599e-06 W + Runtime Dynamic = 0.227164 W + + FP Front End RAT with 0 internal checkpoints: + Area = 0.00626328 mm^2 + Peak Dynamic = 0.711248 W + Subthreshold Leakage = 0.000560468 W + Subthreshold Leakage with power gating = 0.000267931 W + Gate Leakage = 7.28843e-07 W + Runtime Dynamic = 0.000153759 W + + Free List: + Area = 0.0548701 mm^2 + Peak Dynamic = 0.0865958 W + Subthreshold Leakage = 0.0024558 W + Subthreshold Leakage with power gating = 0.00131282 W + Gate Leakage = 2.4131e-06 W + Runtime Dynamic = 0.0310528 W + + Int Retire RAT: + Area = 0.00897726 mm^2 + Peak Dynamic = 0.0342243 W + Subthreshold Leakage = 0.000617253 W + Subthreshold Leakage with power gating = 0.000299241 W + Gate Leakage = 7.71026e-07 W + Runtime Dynamic = 0.00707579 W + + FP Retire RAT: + Area = 0.00258785 mm^2 + Peak Dynamic = 0.0110448 W + Subthreshold Leakage = 0.000292791 W + Subthreshold Leakage with power gating = 0.000147058 W + Gate Leakage = 4.13248e-07 W + Runtime Dynamic = 0.000108807 W + + FP Free List: + Area = 0.0255437 mm^2 + Peak Dynamic = 0.0635978 W + Subthreshold Leakage = 0.00170212 W + Subthreshold Leakage with power gating = 0.000895282 W + Gate Leakage = 1.73308e-06 W + Runtime Dynamic = 0.000346383 W + + Load Store Unit: + Area = 1.85212 mm^2 + Peak Dynamic = 2.25124 W + Subthreshold Leakage = 1.07304 W + Subthreshold Leakage with power gating = 0.400749 W + Gate Leakage = 0.0016757 W + Runtime Dynamic = 0.192392 W + + Data Cache: + Area = 1.50084 mm^2 + Peak Dynamic = 1.96909 W + Subthreshold Leakage = 0.930244 W + Subthreshold Leakage with power gating = 0.349345 W + Gate Leakage = 0.00128629 W + Runtime Dynamic = 0.0863849 W + + LoadQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0175348 W + + StoreQ: + Area = 0.0698251 mm^2 + Peak Dynamic = 0.0876738 W + Subthreshold Leakage = 0.0305434 W + Subthreshold Leakage with power gating = 0.0109956 W + Gate Leakage = 4.43227e-05 W + Runtime Dynamic = 0.0350695 W + + Memory Management Unit: + Area = 0.0981738 mm^2 + Peak Dynamic = 0.294179 W + Subthreshold Leakage = 0.105114 W + Subthreshold Leakage with power gating = 0.0378411 W + Runtime Dynamic = 0.133508 W + + Itlb: + Area = 0.012108 mm^2 + Peak Dynamic = 0.00653924 W + Subthreshold Leakage = 0.00655894 W + Subthreshold Leakage with power gating = 0.00236122 W + Gate Leakage = 8.94812e-06 W + Runtime Dynamic = 0 W + + Dtlb: + Area = 0.0384856 mm^2 + Peak Dynamic = 0.0206236 W + Subthreshold Leakage = 0.0168515 W + Subthreshold Leakage with power gating = 0.00606654 W + Gate Leakage = 2.36467e-05 W + Runtime Dynamic = 0 W + + Execution Unit: + Area = 5.26739 mm^2 + Peak Dynamic = 9.5616 W + Subthreshold Leakage = 2.20841 W + Subthreshold Leakage with power gating = 0.805832 W + Runtime Dynamic = 2.12266 W + + Register Files: + Area = 1.07346 mm^2 + Peak Dynamic = 3.06047 W + Subthreshold Leakage = 0.0715723 W + Subthreshold Leakage with power gating = 0.0339382 W + Gate Leakage = 8.39932e-05 W + Runtime Dynamic = 0.167715 W + + Integer RF: + Area = 0.536731 mm^2 + Peak Dynamic = 2.45899 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.116703 W + + Floating Point RF: + Area = 0.536731 mm^2 + Peak Dynamic = 0.601483 W + Subthreshold Leakage = 0.0357862 W + Subthreshold Leakage with power gating = 0.0169691 W + Gate Leakage = 4.19966e-05 W + Runtime Dynamic = 0.063453 W + + Instruction Scheduler: + Area = 0.325195 mm^2 + Peak Dynamic = 1.8886 W + Subthreshold Leakage = 0.0507177 W + Subthreshold Leakage with power gating = 0.0208898 W + Gate Leakage = 6.6877e-05 W + Runtime Dynamic = 0.789333 W + + Instruction Window: + Area = 0.0576312 mm^2 + Peak Dynamic = 0.486039 W + Subthreshold Leakage = 0.0143608 W + Subthreshold Leakage with power gating = 0.0051699 W + Gate Leakage = 2.14976e-05 W + Runtime Dynamic = 0.129918 W + + FP Instruction Window: + Area = 0.0534741 mm^2 + Peak Dynamic = 0.372611 W + Subthreshold Leakage = 0.0188187 W + Subthreshold Leakage with power gating = 0.00677472 W + Gate Leakage = 2.77821e-05 W + Runtime Dynamic = 0.0698433 W + + ROB: + Area = 0.21409 mm^2 + Peak Dynamic = 1.02995 W + Subthreshold Leakage = 0.0175382 W + Subthreshold Leakage with power gating = 0.00894517 W + Gate Leakage = 1.75973e-05 W + Runtime Dynamic = 0.589572 W + + Integer ALUs (Count: 6 ): + Area = 0.230726 mm^2 + Peak Dynamic = 1.19719 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.201342 W + + Floating Point Units (FPUs) (Count: 6 ): + Area = 3.34032 mm^2 + Peak Dynamic = 1.07747 W + Subthreshold Leakage = 1.18618 W + Subthreshold Leakage with power gating = 0.427026 W + Gate Leakage = 0.00173513 W + Runtime Dynamic = 0.265815 W + + Complex ALUs (Mul/Div) (Count: 2 ): + Area = 0.230726 mm^2 + Peak Dynamic = 0.267364 W + Subthreshold Leakage = 0.327734 W + Subthreshold Leakage with power gating = 0.117984 W + Gate Leakage = 0.000479405 W + Runtime Dynamic = 0.139631 W + + Results Broadcast Bus: + Area Overhead = 0.0193859 mm^2 + Peak Dynamic = 1.64329 W + Subthreshold Leakage = 0.162765 W + Subthreshold Leakage with power gating = 0.0585953 W + Gate Leakage = 0.00023809 W + Runtime Dynamic = 0.345212 W + + L2 + Area = 0.0189192 mm^2 + Peak Dynamic = 0.117718 W + Subthreshold Leakage = 0.0160624 W + Subthreshold Leakage with power gating = 0.00586218 W + Gate Leakage = 2.2535e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + L3 + Area = 0.019246 mm^2 + Peak Dynamic = 0.120165 W + Subthreshold Leakage = 0.00816913 W + Subthreshold Leakage with power gating = 0.00298074 W + Gate Leakage = 2.29937e-05 W + Runtime Dynamic = 0 W + +***************************************************************************************** + First Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** + Second Level Directory + Area = 0.00754768 mm^2 + Peak Dynamic = 0.0067045 W + Subthreshold Leakage = 0.00194595 W + Subthreshold Leakage with power gating = 0.000875678 W + Gate Leakage = 8.1962e-06 W + Runtime Dynamic = 0 W + +***************************************************************************************** +Memory Controller: + Area = 3.96201 mm^2 + Peak Dynamic = 7.19616 W + Subthreshold Leakage = 0.781017 W + Subthreshold Leakage with power gating = 0.189889 W + Gate Leakage = 0.000715787 W + Runtime Dynamic = 2.87846 W + + Front End Engine: + Area = 0.230165 mm^2 + Peak Dynamic = 1.53245 W + Subthreshold Leakage = 0.0878421 W + Subthreshold Leakage with power gating = 0.0235271 W + Gate Leakage = 7.60771e-05 W + Runtime Dynamic = 0.612979 W + + Transaction Engine: + Area = 1.32845 mm^2 + Peak Dynamic = 5.43776 W + Subthreshold Leakage = 0.246754 W + Subthreshold Leakage with power gating = 0.0592209 W + Gate Leakage = 0.000227721 W + Runtime Dynamic = 2.1751 W + + PHY: + Area = 2.4034 mm^2 + Peak Dynamic = 0.225947 W + Subthreshold Leakage = 0.446421 W + Subthreshold Leakage with power gating = 0.107141 W + Gate Leakage = 0.000411988 W + Runtime Dynamic = 0.0903787 W + +***************************************************************************************** +NOC + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Router: + Area = 0.108914 mm^2 + Peak Dynamic = 0.626822 W + Subthreshold Leakage = 0.0430798 W + Subthreshold Leakage with power gating = 0.0144768 W + Gate Leakage = 0.000121556 W + Runtime Dynamic = 0 W + + Virtual Channel Buffer: + Area = 0.0651005 mm^2 + Peak Dynamic = 0.348957 W + Subthreshold Leakage = 0.00573303 W + Subthreshold Leakage with power gating = 0.00103195 W + Gate Leakage = 1.19331e-05 W + Runtime Dynamic = 0 W + + Crossbar: + Area = 0.0102137 mm^2 + Peak Dynamic = 0.276931 W + Subthreshold Leakage = 0.0373461 W + Subthreshold Leakage with power gating = 0.0134446 W + Gate Leakage = 0.000109621 W + Runtime Dynamic = 0 W + + Arbiter: + Peak Dynamic = 0.000933981 W + Subthreshold Leakage = 6.23276e-07 W + Subthreshold Leakage with power gating = 2.24379e-07 W + Gate Leakage = 1.37911e-09 W + Runtime Dynamic = 0 W + +***************************************************************************************** diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index f7d4aa9..d37ce80 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -31,19 +31,21 @@ import difflib import glob from threading import Timer +import threading, queue start = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") verbose = True debug = True quiet = False timeout_limit = 120.0 -kill_flag = False +kill_flag = [] parser = argparse.ArgumentParser() parser.add_argument('--input', type=str, default="./input/basic_test_1", help="Test Input Path") parser.add_argument("--output", type=str, default="./output/basic_test_1", help="Test Output Path") parser.add_argument("--golden", type=str, default="./golden/basic_test_1", help="Test Golden Path") parser.add_argument("--serial", type=bool, default=False, help="Serial if true, Basic if false") +parser.add_argument("--nthreads", type=int, default=1, help="Number of Threads to unit test with") args = parser.parse_args() input_path = args.input @@ -81,9 +83,9 @@ def print_results(passed, failed, total): "\033[00m; Total Vectors: " + str(total)) -def kill(p): +def kill(p, i): global kill_flag - kill_flag = True + kill_flag[i] = True try: p.kill() except OSError: @@ -105,9 +107,9 @@ def diff_result(vector): return 1 -def run_test_normal(vector): +def run_test_normal(vector, i): global kill_flag - kill_flag = False + kill_flag[i] = False infile = os.path.join(input_path, vector + ".xml") stdo = os.path.join(output_path, vector + ".out") stde = os.path.join(output_path, vector + ".err") @@ -121,11 +123,11 @@ def run_test_normal(vector): ], stdout=so, stderr=se) - t = Timer(timeout_limit, kill, [p]) + t = Timer(timeout_limit, kill, [p, i]) t.start() p.wait() t.cancel() - if kill_flag: + if kill_flag[i]: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 else: @@ -140,9 +142,9 @@ def run_test_normal(vector): return 0 -def run_test_serializaiton_create(vector): +def run_test_serializaiton_create(vector, i): global kill_flag - kill_flag = False + kill_flag[i] = False infile = os.path.join(input_path, vector + ".xml") sname = os.path.join(output_path, vector + ".txt") stdo = os.path.join(output_path, vector + ".out") @@ -154,11 +156,11 @@ def run_test_serializaiton_create(vector): ], stdout=so, stderr=se) - t = Timer(timeout_limit, kill, [p]) + t = Timer(timeout_limit, kill, [p,i]) t.start() p.wait() t.cancel() - if kill_flag: + if kill_flag[i]: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 if (os.stat(os.path.join(output_path, vector + ".txt")).st_size > 0): @@ -169,9 +171,9 @@ def run_test_serializaiton_create(vector): return 0 -def run_test_serialization_restore(vector, sfile): +def run_test_serialization_restore(vector, sfile, i): global kill_flag - kill_flag = False + kill_flag[i] = False infile = os.path.join(input_path, vector + ".xml") sname = os.path.join(output_path, sfile + ".txt") stdo = os.path.join(output_path, vector + ".out") @@ -183,25 +185,57 @@ def run_test_serialization_restore(vector, sfile): ], stdout=so, stderr=se) - t = Timer(timeout_limit, kill, [p]) + t = Timer(timeout_limit, kill, [p, i]) t.start() p.wait() t.cancel() - if kill_flag: + if kill_flag[i]: print_fail(vector, "Timeout Limit of " + str(timeout_limit) + "s Reached") return 1 - #else: - # if diff_result(vector) == 0: - # print_pass(vector) - # return 0 - # else: - # print_fail( - # vector, - # "The files " + vector + ".out and " + vector + ".golden differ") - # return 1 - print_pass(vector) + else: + if diff_result(vector) == 0: + print_pass(vector) + return 0 + else: + print_fail( + vector, + "The files " + vector + ".out and " + vector + ".golden differ") + return 1 return 0 +results = [] +iteration = 0 + +def worker_thread_normal(iq, tid): + global results + global iteration + """ Worker Thread Normal expects just the test vector name and test numer in + the queue [name, number] """ + while not iq.empty(): + test = iq.get(); + name = test[0] + number = test[1] + if run_test_normal(name, tid) == 0: + results[number] = True + else: + results[number] = False + +def worker_thread_serial(iq, tid): + global results + global iteration + """ Worker Thread Serial expects test vector name, test number and the serial + file name in the queue [name, number, sfile] """ + global results + while not iq.empty(): + test = iq.get(); + name = test[0] + number = test[1] + sfile = test[2] + if run_test_serialization_restore(name, sfile, tid) == 0: + results[number] = True + else: + results[number] = False + def get_vectors(): files = glob.glob(os.path.join(input_path, "*")) @@ -214,26 +248,46 @@ def get_vectors(): f = 0 print_info(start) vectors = get_vectors() + results = [False]*len(vectors) print_info("Found " + str(len(vectors)) + " test vectors") + + InputQueue = queue.Queue() + threads = [] + kill_flag = [False]*args.nthreads + if not args.serial: - for vector in vectors: - if run_test_normal(vector) == 0: - p += 1 - else: - f += 1 + # Prepare queue with inputs: + for vector,i in zip(vectors, range(len(vectors))): + InputQueue.put([vector, i]) + # Create Threads: + for i in range(args.nthreads): + thr = threading.Thread(target=worker_thread_normal, args=[InputQueue, i]) + thr.start() + threads.append(thr) + # Join Threads: + for thr in threads: + thr.join() else: # Create a Serialized File: if(len(vectors) > 0): - if run_test_serializaiton_create(vectors[0]) == 0: - # Use Serialized File for Remainder of Tests: - for vector in vectors: - if run_test_serialization_restore(vector, vectors[0]) == 0: - p += 1 - else: - f += 1 - else: - print_info("Failed to create serialization checkpoint") + if run_test_serializaiton_create(vectors[0], 0) == 0: + # Prepare queue with inputs: + for vector,i in zip(vectors, range(len(vectors))): + InputQueue.put([vector, i, vectors[0]]) + # Create Threads: + for i in range(args.nthreads): + thr = threading.Thread(target=worker_thread_serial, args=[InputQueue, i]) + thr.start() + threads.append(thr) + # Join Threads: + for thr in threads: + thr.join() else: print_info("No files in "+input_path) sys.exit(1) + for i in results: + if i: + p += 1 + else: + f += 1 print_results(p, f, len(vectors)) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 71c5bb9..1089a01 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -28,7 +28,7 @@ print_info () { print_info "#########################################################" print_info "# #" -print_info "# McPAT Unit Test v2.0.0 #" +print_info "# McPAT Unit Test v2.0.1 #" print_info "# #" print_info "#########################################################" @@ -38,6 +38,8 @@ TESTS=("basic_test_1" "serialization_test_3" "serialization_test_4") +NTHREADS="32" + #-------------------------------------------------------------------- # Output Directories # ___ _ _ _____ ____ _ _ _____ ____ ___ ____ @@ -59,6 +61,8 @@ for test_set in ${TESTS[@]}; do mkdir -p $OUTPUT/$test_set done +print_info "Launching Tests; NTHREADS=$NTHREADS" + #-------------------------------------------------------------------- # Run Tests # _____ _____ ____ _____ ____ @@ -71,39 +75,44 @@ done print_info "#########################################################" print_info "# Unit Test Basic 1 #" print_info "#########################################################" -./unit_test.py \ - --input=./input/basic_test_1 \ - --output=./output/basic_test_1 \ - --golden=./golden/basic_test_1 +#./unit_test.py \ +# --input=./input/basic_test_1 \ +# --output=./output/basic_test_1 \ +# --golden=./golden/basic_test_1 \ +# --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 1 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_1 \ -# --output=./output/serialization_test_1 \ -# --golden=./golden/serialization_test_1 \ -# --serial=True +./unit_test.py \ + --input=./input/serialization_test_1 \ + --output=./output/serialization_test_1 \ + --golden=./golden/serialization_test_1 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 2 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_2 \ -# --output=./output/serialization_test_2 \ -# --golden=./golden/serialization_test_2 \ -# --serial=True +./unit_test.py \ + --input=./input/serialization_test_2 \ + --output=./output/serialization_test_2 \ + --golden=./golden/serialization_test_2 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 3 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_3 \ -# --output=./output/serialization_test_3 \ -# --golden=./golden/serialization_test_3 \ -# --serial=True +./unit_test.py \ + --input=./input/serialization_test_3 \ + --output=./output/serialization_test_3 \ + --golden=./golden/serialization_test_3 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 4 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_4 \ -# --output=./output/serialization_test_4 \ -# --golden=./golden/serialization_test_4 \ -# --serial=True +./unit_test.py \ + --input=./input/serialization_test_4 \ + --output=./output/serialization_test_4 \ + --golden=./golden/serialization_test_4 \ + --serial=True \ + --nthreads=$NTHREADS From 44d9521f76876fc3c05f44c393eec579ea792388 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 23 Jun 2020 17:10:29 -0500 Subject: [PATCH 47/59] instruction fetch unit finally DONE --- src/core/core.cc | 10 ++-- src/core/instfetch.cc | 103 ++++++++++++++++++++++++++++-------------- src/core/instfetch.h | 11 ++++- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index 1d042b5..cfd5e71 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -71,7 +71,11 @@ Core::Core(const ParseXML *XML_interface, clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); + ifu = new InstFetchU(); + ifu->set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); + ifu->computeArea(); + ifu->set_stats(XML); + lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); mmu = new MemManU(); mmu->set_params(XML, ithCore, &interface_ip, coredynp); @@ -155,7 +159,7 @@ void Core::computeEnergy(bool is_tdp) { double rtp_pipeline_coe; double num_units = 4.0; if (is_tdp) { - ifu->computeEnergy(is_tdp); + ifu->computeDynamicPower(is_tdp); lsu->computeEnergy(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); @@ -245,7 +249,7 @@ void Core::computeEnergy(bool is_tdp) { } } else { - ifu->computeEnergy(is_tdp); + ifu->computeDynamicPower(is_tdp); lsu->computeEnergy(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index f482227..a145872 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -42,14 +42,16 @@ #include #include -InstFetchU::InstFetchU(const ParseXML *XML_interface, +void InstFetchU::set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), - exist(exist_) { + bool exist_){ + XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; + coredynp=dyn_p_; + exist=exist_; + + if (!exist) return; int idx, tag, data, size, line, assoc, banks; @@ -105,13 +107,10 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - icache.caches.computeArea(); + scktRatio = g_tp.sckt_co_eff; chip_PR_overhead = g_tp.chip_layout_overhead; macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area() + - icache.caches.local_result.area); - area.set_area(area.get_area() + icache.caches.local_result.area); // output_data_csv(icache.caches.local_result); /* @@ -162,9 +161,6 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - icache.missb.computeArea(); - icache.area.set_area(icache.area.get_area() + icache.missb.local_result.area); - area.set_area(area.get_area() + icache.missb.local_result.area); // output_data_csv(icache.missb.local_result); // fill buffer @@ -200,9 +196,6 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - icache.ifb.computeArea(); - icache.area.set_area(icache.area.get_area() + icache.ifb.local_result.area); - area.set_area(area.get_area() + icache.ifb.local_result.area); // output_data_csv(icache.ifb.local_result); // prefetch buffer @@ -242,10 +235,6 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - icache.prefetchb.computeArea(); - icache.area.set_area(icache.area.get_area() + - icache.prefetchb.local_result.area); - area.set_area(area.get_area() + icache.prefetchb.local_result.area); // output_data_csv(icache.prefetchb.local_result); // Instruction buffer @@ -292,9 +281,6 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - IB.computeArea(); - IB.area.set_area(IB.area.get_area() + IB.local_result.area); - area.set_area(area.get_area() + IB.local_result.area); // output_data_csv(IB.IB.local_result); // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; @@ -360,15 +346,9 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - BTB.computeArea(); - BTB.area.set_area(BTB.area.get_area() + BTB.local_result.area); - area.set_area(area.get_area() + BTB.local_result.area); /// cout<<"area="< 0) { + BPT.set_stats(XML); + } + init_stats = true; +} + +void InstFetchU::computeArea(){ + if (!init_params) { + std::cerr << "[ InstFetchU ] Error: must set params before calling " + "computeArea()\n"; + + exit(1); + } + icache.caches.computeArea(); + icache.area.set_area(icache.area.get_area() + + icache.caches.local_result.area); + area.set_area(area.get_area() + icache.caches.local_result.area); + + icache.missb.computeArea(); + icache.area.set_area(icache.area.get_area() + icache.missb.local_result.area); + area.set_area(area.get_area() + icache.missb.local_result.area); + + icache.ifb.computeArea(); + icache.area.set_area(icache.area.get_area() + icache.ifb.local_result.area); + area.set_area(area.get_area() + icache.ifb.local_result.area); + + icache.prefetchb.computeArea(); + icache.area.set_area(icache.area.get_area() + + icache.prefetchb.local_result.area); + area.set_area(area.get_area() + icache.prefetchb.local_result.area); + + IB.computeArea(); + IB.area.set_area(IB.area.get_area() + IB.local_result.area); + area.set_area(area.get_area() + IB.local_result.area); + + if (coredynp.predictionW > 0) { + BPT.computeArea(); + area.set_area(area.get_area() + BPT.area.get_area()); + BTB.computeArea(); + BTB.area.set_area(BTB.area.get_area() + BTB.local_result.area); + area.set_area(area.get_area() + BTB.local_result.area); + } + + ID_misc.computeArea(); ID_operand.computeArea(); - ID_operand.computeDynamicPower(); ID_inst.computeArea(); - ID_inst.computeDynamicPower(); // TODO: X86 decoder should decode the inst in cyclic mode under the control // of squencer. So the dynamic power should be multiplied by a few times. @@ -410,9 +433,15 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, coredynp.decodeW); } -void InstFetchU::computeEnergy(bool is_tdp) { +void InstFetchU::computeDynamicPower(bool is_tdp) { + if (!exist) return; + if (!init_stats) { + std::cerr << "[ InstFetchU ] Error: must set params before calling " + "computeDynamicPower()\n"; + exit(1); + } if (is_tdp) { // init stats for Peak icache.caches.stats_t.readAc.access = @@ -559,6 +588,10 @@ void InstFetchU::computeEnergy(bool is_tdp) { // (icache.missb.local_result.power + // icache.ifb.local_result.power + // icache.prefetchb.local_result.power)*pppm_Isub; + + ID_misc.computeDynamicPower(); + ID_operand.computeDynamicPower(); + ID_inst.computeDynamicPower(); icache.power = icache.power_t + (icache.caches.local_result.power + icache.missb.local_result.power + diff --git a/src/core/instfetch.h b/src/core/instfetch.h index bb9b535..b34a255 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -61,14 +61,21 @@ class InstFetchU : public Component { inst_decoder ID_misc; bool exist; - InstFetchU(const ParseXML *XML_interface, + InstFetchU(){init_params = false; init_stats = false;}; + void set_stats(const ParseXML *XML_interface); + void set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, bool exsit = true); - void computeEnergy(bool is_tdp = true); + void computeArea(); + void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~InstFetchU(); + + private: + bool init_params; + bool init_stats; }; #endif // __INST_FETCH_U_H__ \ No newline at end of file From 3a9aee7575891f121965cd1532264fcecbc4954b Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 17:29:20 -0500 Subject: [PATCH 48/59] refactor: Load Store Unit Refactored Load Store Unit to work with serialization --- src/cacti/decoder.cc | 152 +++++++---- src/cacti/decoder.h | 36 ++- src/core/core.cc | 62 ++--- src/core/core.h | 2 +- src/core/exec_unit.cc | 44 ++- src/core/exec_unit.h | 13 +- src/core/instfetch.cc | 76 +++--- src/core/loadstore.cc | 326 +++++++++++++---------- src/core/loadstore.h | 25 +- src/core/renaming_unit.cc | 41 ++- src/core/scheduler.cc | 18 +- src/logic/dep_resource_conflict_check.cc | 16 +- src/logic/dep_resource_conflict_check.h | 6 +- src/logic/inst_decoder.cc | 70 ++--- src/logic/inst_decoder.h | 28 +- src/logic/selection_logic.cc | 13 +- src/logic/selection_logic.h | 14 +- unit_test/unit_test.py | 49 ++-- unit_test/unit_test.sh | 58 ++-- 19 files changed, 564 insertions(+), 485 deletions(-) diff --git a/src/cacti/decoder.cc b/src/cacti/decoder.cc index fda3fd9..1d069e4 100644 --- a/src/cacti/decoder.cc +++ b/src/cacti/decoder.cc @@ -54,7 +54,8 @@ Decoder::Decoder(int _num_dec_signals, R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), // power(), fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), - total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { + total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), + nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; @@ -95,24 +96,32 @@ Decoder::Decoder(int _num_dec_signals, compute_area(); } - void Decoder::set_params(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_, - int nodes_DSTN_) { - + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_, + int nodes_DSTN_) { + exist = false; C_ld_dec_out = _C_ld_dec_out; - R_wire_dec_out=_R_wire_dec_out; num_gates=0; num_gates_min=2; delay=0; - fully_assoc=fully_assoc_; is_dram=is_dram_; is_wl_tr=is_wl_tr_; - total_driver_nwidth=0; total_driver_pwidth=0; sleeptx=NULL; nodes_DSTN=nodes_DSTN_; power_gating=power_gating_; + R_wire_dec_out = _R_wire_dec_out; + num_gates = 0; + num_gates_min = 2; + delay = 0; + fully_assoc = fully_assoc_; + is_dram = is_dram_; + is_wl_tr = is_wl_tr_; + total_driver_nwidth = 0; + total_driver_pwidth = 0; + sleeptx = NULL; + nodes_DSTN = nodes_DSTN_; + power_gating = power_gating_; for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; @@ -184,8 +193,8 @@ void Decoder::compute_widths() { } } -void Decoder::computeArea(){ - compute_widths(); +void Decoder::computeArea() { + compute_widths(); compute_area(); } void Decoder::compute_area() { @@ -371,16 +380,27 @@ PredecBlk::PredecBlk(int num_dec_signals, double R_wire_predec_blk_out_, int num_dec_per_predec, bool is_dram, - bool is_blk1){ + bool is_blk1) { dec = dec_; - exist= false; number_input_addr_bits= 0; - C_ld_predec_blk_out= 0; R_wire_predec_blk_out= 0; - branch_effort_nand2_gate_output= 1; branch_effort_nand3_gate_output= 1; - flag_two_unique_paths= false; flag_L2_gate= 0; number_inputs_L1_gate= 0; - number_gates_L1_nand2_path= 0; number_gates_L1_nand3_path= 0; - number_gates_L2= 0; min_number_gates_L1= 2; min_number_gates_L2= 2; - num_L1_active_nand2_path= 0; num_L1_active_nand3_path= 0; - delay_nand2_path= 0; delay_nand3_path= 0; is_dram_= is_dram; + exist = false; + number_input_addr_bits = 0; + C_ld_predec_blk_out = 0; + R_wire_predec_blk_out = 0; + branch_effort_nand2_gate_output = 1; + branch_effort_nand3_gate_output = 1; + flag_two_unique_paths = false; + flag_L2_gate = 0; + number_inputs_L1_gate = 0; + number_gates_L1_nand2_path = 0; + number_gates_L1_nand3_path = 0; + number_gates_L2 = 0; + min_number_gates_L1 = 2; + min_number_gates_L2 = 2; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + is_dram_ = is_dram; int branch_effort_predec_out; double C_ld_dec_gate; @@ -434,21 +454,32 @@ PredecBlk::PredecBlk(int num_dec_signals, } void PredecBlk::set_params(int num_dec_signals, - Decoder *dec_, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out_, - int num_dec_per_predec, - bool is_dram, - bool is_blk1){ + Decoder *dec_, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1) { dec = dec_; - exist= false; number_input_addr_bits= 0; - C_ld_predec_blk_out= 0; R_wire_predec_blk_out= 0; - branch_effort_nand2_gate_output= 1; branch_effort_nand3_gate_output= 1; - flag_two_unique_paths= false; flag_L2_gate= 0; number_inputs_L1_gate= 0; - number_gates_L1_nand2_path= 0; number_gates_L1_nand3_path= 0; - number_gates_L2= 0; min_number_gates_L1= 2; min_number_gates_L2= 2; - num_L1_active_nand2_path= 0; num_L1_active_nand3_path= 0; - delay_nand2_path= 0; delay_nand3_path= 0; is_dram_= is_dram; + exist = false; + number_input_addr_bits = 0; + C_ld_predec_blk_out = 0; + R_wire_predec_blk_out = 0; + branch_effort_nand2_gate_output = 1; + branch_effort_nand3_gate_output = 1; + flag_two_unique_paths = false; + flag_L2_gate = 0; + number_inputs_L1_gate = 0; + number_gates_L1_nand2_path = 0; + number_gates_L1_nand3_path = 0; + number_gates_L2 = 0; + min_number_gates_L1 = 2; + min_number_gates_L2 = 2; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + is_dram_ = is_dram; int branch_effort_predec_out; double C_ld_dec_gate; @@ -1175,17 +1206,28 @@ void PredecBlk::leakage_feedback(double temperature) { } } -void PredecBlkDrv::set_params(int way_select_, PredecBlk *blk_, bool is_dram){ - flag_driver_exists=0; number_gates_nand2_path=0; - number_gates_nand3_path=0; min_number_gates=2; - num_buffers_driving_1_nand2_load=0; num_buffers_driving_2_nand2_load=0; - num_buffers_driving_4_nand2_load=0; num_buffers_driving_2_nand3_load=0; - num_buffers_driving_8_nand3_load=0; num_buffers_nand3_path=0; - c_load_nand2_path_out=0; c_load_nand3_path_out=0; - r_load_nand2_path_out=0; r_load_nand3_path_out=0; delay_nand2_path=0; - delay_nand3_path=0;blk=blk_; - dec=blk->dec; is_dram_=is_dram; way_select=way_select_; - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { +void PredecBlkDrv::set_params(int way_select_, PredecBlk *blk_, bool is_dram) { + flag_driver_exists = 0; + number_gates_nand2_path = 0; + number_gates_nand3_path = 0; + min_number_gates = 2; + num_buffers_driving_1_nand2_load = 0; + num_buffers_driving_2_nand2_load = 0; + num_buffers_driving_4_nand2_load = 0; + num_buffers_driving_2_nand3_load = 0; + num_buffers_driving_8_nand3_load = 0; + num_buffers_nand3_path = 0; + c_load_nand2_path_out = 0; + c_load_nand3_path_out = 0; + r_load_nand2_path_out = 0; + r_load_nand3_path_out = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + blk = blk_; + dec = blk->dec; + is_dram_ = is_dram; + way_select = way_select_; + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_nand2_path_n[i] = 0; width_nand2_path_p[i] = 0; width_nand3_path_n[i] = 0; @@ -1514,8 +1556,11 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) { num_act_mats_hor_dir; } -void Predec::set_params(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_){ - blk1=drv1_->blk; blk2=drv2_->blk; drv1=drv1_; drv2=drv2_; +void Predec::set_params(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) { + blk1 = drv1_->blk; + blk2 = drv2_->blk; + drv1 = drv1_; + drv2 = drv2_; driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + @@ -1559,7 +1604,6 @@ void Predec::set_params(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_){ blk2->power_L2.readOp.gate_leakage; power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; - } Predec::Predec(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { diff --git a/src/cacti/decoder.h b/src/cacti/decoder.h index ff7193a..63847a6 100644 --- a/src/cacti/decoder.h +++ b/src/cacti/decoder.h @@ -43,8 +43,7 @@ using namespace std; class Decoder : public Component { public: - - Decoder(int _num_dec_signals, + Decoder(int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, double _R_wire_dec_out, @@ -56,15 +55,15 @@ class Decoder : public Component { int nodes_DSTN_ = 1); Decoder(){}; void set_params(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_ = false, - int nodes_DSTN_ = 1); + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_ = false, + int nodes_DSTN_ = 1); bool exist; int num_in_signals; double C_ld_dec_out; @@ -87,7 +86,6 @@ class Decoder : public Component { int nodes_DSTN; bool power_gating; - void computeArea(); void compute_widths(); void compute_area(); @@ -113,12 +111,12 @@ class PredecBlk : public Component { bool is_dram_, bool is_blk1); void set_params(int num_dec_signals, - Decoder *dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); + Decoder *dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); Decoder *dec; bool exist; @@ -163,7 +161,6 @@ class PredecBlk : public Component { class PredecBlkDrv : public Component { public: - void set_params(int way_select_, PredecBlk *blk_, bool is_dram); PredecBlkDrv(){}; PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram); @@ -218,7 +215,6 @@ class PredecBlkDrv : public Component { class Predec : public Component { public: - Predec(){}; void set_params(PredecBlkDrv *drv1, PredecBlkDrv *drv2); Predec(PredecBlkDrv *drv1, PredecBlkDrv *drv2); diff --git a/src/core/core.cc b/src/core/core.cc index 1d042b5..dd87987 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -48,8 +48,7 @@ Core::Core(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - ifu(0), lsu(0), mmu(0), rnu(0), corepipe(0), undiffCore(0), - l2cache(0) { + ifu(0), mmu(0), rnu(0), corepipe(0), undiffCore(0), l2cache(0) { /* * initialize, compute and optimize individual components. */ @@ -72,13 +71,14 @@ Core::Core(const ParseXML *XML_interface, clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; ifu = new InstFetchU(XML, ithCore, &interface_ip, coredynp, exit_flag); - lsu = new LoadStoreU(XML, ithCore, &interface_ip, coredynp, exit_flag); + lsu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); + lsu.computeArea(); mmu = new MemManU(); mmu->set_params(XML, ithCore, &interface_ip, coredynp); mmu->computeArea(); mmu->set_stats(XML); exu.set_params( - XML, ithCore, &interface_ip, lsu->lsq_height, coredynp, exit_flag); + XML, ithCore, &interface_ip, lsu.lsq_height, coredynp, exit_flag); exu.computeArea(); exu.set_stats(XML); exu.computeStaticPower(); @@ -107,9 +107,9 @@ Core::Core(const ParseXML *XML_interface, ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + ifu->area.get_area()); } - if (lsu->exist) { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); + if (lsu.exist) { + lsu.area.set_area(lsu.area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu.area.get_area()); } if (exu.exist) { exu.area.set_area(exu.area.get_area() + pipeline_area_per_unit); @@ -156,7 +156,7 @@ void Core::computeEnergy(bool is_tdp) { double num_units = 4.0; if (is_tdp) { ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); + lsu.computePower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); @@ -193,16 +193,16 @@ void Core::computeEnergy(bool is_tdp) { // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } - if (lsu->exist) { + if (lsu.exist) { set_pppm(pppm_t, coredynp.num_pipelines / num_units * coredynp.LSU_duty_cycle, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - lsu->power = lsu->power + corepipe->power * pppm_t; + lsu.power = lsu.power + corepipe->power * pppm_t; // cout << "LSU = " << - // lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; + // lsu.power.readOp.dynamic*clockRate << " W" << endl; + power = power + lsu.power; // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } @@ -246,7 +246,7 @@ void Core::computeEnergy(bool is_tdp) { } else { ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); + lsu.computePower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); @@ -290,7 +290,7 @@ void Core::computeEnergy(bool is_tdp) { ifu->rt_power = ifu->rt_power + corepipe->power * pppm_t; rt_power = rt_power + ifu->rt_power; } - if (lsu->exist) { + if (lsu.exist) { if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.LSU_duty_cycle * XML->sys.total_cycles * @@ -305,8 +305,8 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - lsu->rt_power = lsu->rt_power + corepipe->power * pppm_t; - rt_power = rt_power + lsu->rt_power; + lsu.rt_power = lsu.rt_power + corepipe->power * pppm_t; + rt_power = rt_power + lsu.rt_power; } if (exu.exist) { if (XML->sys.homogeneous_cores == 1) { @@ -440,31 +440,31 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } } - if (lsu->exist) { + if (lsu.exist) { cout << indent_str << "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << lsu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << lsu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << lsu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? lsu->power.readOp.longer_channel_leakage - : lsu->power.readOp.leakage) + << (long_channel ? lsu.power.readOp.longer_channel_leakage + : lsu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? lsu->power.readOp.power_gated_with_long_channel_leakage - : lsu->power.readOp.power_gated_leakage) + ? lsu.power.readOp.power_gated_with_long_channel_leakage + : lsu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << lsu.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << lsu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << lsu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - lsu->displayEnergy(indent + 4, plevel, is_tdp); + lsu.display(indent + 4, plevel, is_tdp); } } if (mmu->exist) { @@ -555,12 +555,12 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next //<< "Load Store Unit Peak Dynamic = " << - // lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + // lsu.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Load Store Unit Subthreshold Leakage = " << - // lsu->rt_power.readOp.leakage << " W" << endl; cout << + // lsu.rt_power.readOp.leakage << " W" << endl; cout << // indent_str_next // << "Load Store Unit Gate Leakage = " << - // lsu->rt_power.readOp.gate_leakage + // lsu.rt_power.readOp.gate_leakage //<< " W" << endl; cout << indent_str_next << "Memory Management Unit // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << // endl; cout << indent_str_next << "Memory Management Unit Subthreshold @@ -584,10 +584,6 @@ Core ::~Core() { delete ifu; ifu = 0; } - if (lsu) { - delete lsu; - lsu = 0; - } if (rnu) { delete rnu; rnu = 0; diff --git a/src/core/core.h b/src/core/core.h index e47c82c..df4f2f7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -55,7 +55,7 @@ class Core : public Component { double clockRate, executionTime; double scktRatio, chip_PR_overhead, macro_PR_overhead; InstFetchU *ifu; - LoadStoreU *lsu; + LoadStoreU lsu; MemManU *mmu; EXECU exu; RENAMINGU *rnu; diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 547d77e..7c24705 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -42,17 +42,17 @@ #include #include -EXECU::EXECU(){ +EXECU::EXECU() { init_params = false; init_stats = false; } void EXECU::set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - double lsq_height_, - const CoreDynParam &dyn_p_, - bool exist_){ + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_) { XML = XML_interface; ithCore = ithCore_; @@ -70,13 +70,11 @@ void EXECU::set_params(const ParseXML *XML_interface, scheu.set_params(XML, ithCore, &interface_ip, coredynp); exeu.set_params(XML, ithCore, &interface_ip, coredynp, ALU); - if (coredynp.num_fpus > 0) { fp_u.set_params(XML, ithCore, &interface_ip, coredynp, FPU); } if (coredynp.num_muls > 0) { mul.set_params(XML, ithCore, &interface_ip, coredynp, MUL); - } /* * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; @@ -88,15 +86,16 @@ void EXECU::set_params(const ParseXML *XML_interface, init_params = true; } -void EXECU::computeStaticPower(){ - //Doing nothing as of now, everything seems to be hapening inside set area itself +void EXECU::computeStaticPower() { + // Doing nothing as of now, everything seems to be hapening inside set area + // itself } -void EXECU::set_stats(const ParseXML *XML){ - rfu.set_stats(XML); - scheu.set_stats(XML); - exeu.set_stats(XML); - if (coredynp.num_fpus > 0) { +void EXECU::set_stats(const ParseXML *XML) { + rfu.set_stats(XML); + scheu.set_stats(XML); + exeu.set_stats(XML); + if (coredynp.num_fpus > 0) { fp_u.set_stats(XML); } if (coredynp.num_muls > 0) { @@ -105,8 +104,8 @@ void EXECU::set_stats(const ParseXML *XML){ init_stats = true; } -void EXECU::computeArea(){ - if (!init_params) { +void EXECU::computeArea() { + if (!init_params) { std::cerr << "[ EXECU ] Error: must set params before calling " "computeArea()\n"; exit(1); @@ -117,10 +116,10 @@ void EXECU::computeArea(){ exeu.computeArea(); - //all of the below interconnects depend ont he stats being set - rfu.set_stats(XML); - scheu.set_stats(XML); - exeu.set_stats(XML); + // all of the below interconnects depend ont he stats being set + rfu.set_stats(XML); + scheu.set_stats(XML); + exeu.set_stats(XML); double fu_height = 0.0; area.set_area(area.get_area() + exeu.area.get_area() + rfu.area.get_area() + @@ -455,9 +454,8 @@ void EXECU::computeArea(){ area.set_area(area.get_area() + bypass.area.get_area()); } - void EXECU::computeDynamicPower(bool is_tdp) { - if (!init_params) { + if (!init_params) { std::cerr << "[ EXECU ] Error: must set params before calling " "computeStaticPower()\n"; exit(1); diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 958f5d0..de63445 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -71,11 +71,11 @@ class EXECU : public Component { EXECU(); void set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - double lsq_height_, - const CoreDynParam &dyn_p_, - bool exist_ = true); + int ithCore_, + InputParameter *interface_ip_, + double lsq_height_, + const CoreDynParam &dyn_p_, + bool exist_ = true); void set_stats(const ParseXML *XML); void computeArea(); void computeStaticPower(); @@ -83,10 +83,9 @@ class EXECU : public Component { void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~EXECU(){}; - private: +private: bool init_params; bool init_stats; - }; #endif // __EXEC_U_H__ diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index f482227..43934a7 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -48,8 +48,7 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, const CoreDynParam &dyn_p_, bool exist_) : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), - exist(exist_) { + coredynp(dyn_p_), exist(exist_) { if (!exist) return; int idx, tag, data, size, line, assoc, banks; @@ -288,10 +287,10 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; IB.set_params(&interface_ip, - "InstBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + "InstBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); IB.computeArea(); IB.area.set_area(IB.area.get_area() + IB.local_result.area); area.set_area(area.get_area() + IB.local_result.area); @@ -356,10 +355,10 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; BTB.set_params(&interface_ip, - "Branch Target Buffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + "Branch Target Buffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); BTB.computeArea(); BTB.area.set_area(BTB.area.get_area() + BTB.local_result.area); area.set_area(area.get_area() + BTB.local_result.area); @@ -372,36 +371,36 @@ InstFetchU::InstFetchU(const ParseXML *XML_interface, } ID_inst.set_params(is_default, - &interface_ip, - coredynp.opcode_length, - 1 /*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, - coredynp.core_ty); + &interface_ip, + coredynp.opcode_length, + 1 /*Decoder should not know how many by itself*/, + coredynp.x86, + Core_device, + coredynp.core_ty); ID_operand.set_params(is_default, - &interface_ip, - coredynp.arch_ireg_width, - 1, - coredynp.x86, - Core_device, - coredynp.core_ty); + &interface_ip, + coredynp.arch_ireg_width, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); ID_misc.set_params(is_default, - &interface_ip, - 8 /* Prefix field etc upto 14B*/, - 1, - coredynp.x86, - Core_device, - coredynp.core_ty); - + &interface_ip, + 8 /* Prefix field etc upto 14B*/, + 1, + coredynp.x86, + Core_device, + coredynp.core_ty); + ID_misc.computeArea(); ID_misc.computeDynamicPower(); ID_operand.computeArea(); ID_operand.computeDynamicPower(); ID_inst.computeArea(); ID_inst.computeDynamicPower(); - + // TODO: X86 decoder should decode the inst in cyclic mode under the control // of squencer. So the dynamic power should be multiplied by a few times. area.set_area(area.get_area() + @@ -603,13 +602,13 @@ void InstFetchU::computeEnergy(bool is_tdp) { ID_inst.rt_power.readOp.dynamic = ID_inst.power_t.readOp.dynamic * ID_inst.rtp_stats.readAc.access; - ID_operand.rt_power.readOp.dynamic = ID_operand.power_t.readOp.dynamic * - ID_operand.rtp_stats.readAc.access; + ID_operand.rt_power.readOp.dynamic = + ID_operand.power_t.readOp.dynamic * ID_operand.rtp_stats.readAc.access; ID_misc.rt_power.readOp.dynamic = ID_misc.power_t.readOp.dynamic * ID_misc.rtp_stats.readAc.access; - rt_power = rt_power + - (ID_inst.rt_power + ID_operand.rt_power + ID_misc.rt_power); + rt_power = + rt_power + (ID_inst.rt_power + ID_operand.rt_power + ID_misc.rt_power); } } @@ -697,8 +696,8 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } cout << indent_str << "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB.area.get_area() * 1e-6 - << " mm^2" << endl; + cout << indent_str_next << "Area = " << IB.area.get_area() * 1e-6 << " mm^2" + << endl; cout << indent_str_next << "Peak Dynamic = " << IB.power.readOp.dynamic * clockRate << " W" << endl; @@ -712,8 +711,8 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { ? IB.power.readOp.power_gated_with_long_channel_leakage : IB.power.readOp.power_gated_leakage) << " W" << endl; - cout << indent_str_next - << "Gate Leakage = " << IB.power.readOp.gate_leakage << " W" << endl; + cout << indent_str_next << "Gate Leakage = " << IB.power.readOp.gate_leakage + << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " << IB.rt_power.readOp.dynamic / executionTime << " W" << endl; @@ -803,5 +802,4 @@ InstFetchU ::~InstFetchU() { if (!exist) return; - } \ No newline at end of file diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc index 12e3784..d01f44b 100644 --- a/src/core/loadstore.cc +++ b/src/core/loadstore.cc @@ -43,16 +43,30 @@ #include #include -LoadStoreU::LoadStoreU(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - coredynp(dyn_p_), LSQ(0), LoadQ(0), exist(exist_) { - if (!exist) +LoadStoreU::LoadStoreU() { init_params = false; } + +void LoadStoreU::set_params(const ParseXML *XML, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) { + this->XML = XML; + this->ithCore = ithCore_; + this->interface_ip = *interface_ip_; + this->coredynp = dyn_p_; + this->exist = exist_; + + if (!exist) { return; - int idx, tag, data, size, line, assoc, banks; + } + + int idx = 0; + int tag = 0; + int data = 0; + int size = 0; + int line = 0; + int assoc = 0; + int banks = 0; bool debug = false; int ldst_opcode = XML->sys.core[ithCore].opcode_width; // 16; @@ -110,11 +124,6 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.caches.computeArea(); - dcache.area.set_area(dcache.area.get_area() + - dcache.caches.local_result.area); - area.set_area(area.get_area() + dcache.caches.local_result.area); - // output_data_csv(dcache.caches.local_result); // dCache controllers // miss buffer @@ -151,10 +160,6 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.missb.computeArea(); - dcache.area.set_area(dcache.area.get_area() + dcache.missb.local_result.area); - area.set_area(area.get_area() + dcache.missb.local_result.area); - // output_data_csv(dcache.missb.local_result); // fill buffer tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; @@ -187,10 +192,6 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.ifb.computeArea(); - dcache.area.set_area(dcache.area.get_area() + dcache.ifb.local_result.area); - area.set_area(area.get_area() + dcache.ifb.local_result.area); - // output_data_csv(dcache.ifb.local_result); // prefetch buffer tag = XML->sys.physical_address_width + @@ -218,7 +219,6 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.obj_func_leak_power = 0; interface_ip.obj_func_cycle_t = 1; interface_ip.num_rw_ports = debug ? 1 : XML->sys.core[ithCore].memory_ports; - ; interface_ip.num_rd_ports = 0; interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; @@ -227,14 +227,7 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.prefetchb.computeArea(); - dcache.area.set_area(dcache.area.get_area() + - dcache.prefetchb.local_result.area); - area.set_area(area.get_area() + dcache.prefetchb.local_result.area); - // output_data_csv(dcache.prefetchb.local_result); - // WBB - if (cache_p == Write_back) { tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; data = dcache.caches.l_ip.line_sz; @@ -266,10 +259,6 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.wbb.computeArea(); - dcache.area.set_area(dcache.area.get_area() + dcache.wbb.local_result.area); - area.set_area(area.get_area() + dcache.wbb.local_result.area); - // output_data_csv(dcache.wbb.local_result); } /* @@ -303,17 +292,11 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, - "Load(Store)Queue", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area() + LSQ->local_result.area); - area.set_area(area.get_area() + LSQ->local_result.area); - // output_data_csv(LSQ.LSQ.local_result); - lsq_height = - LSQ->local_result.cache_ht * - sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + LSQ.set_params(&interface_ip, + "Load(Store)Queue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { @@ -337,24 +320,81 @@ LoadStoreU::LoadStoreU(const ParseXML *XML_interface, interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; interface_ip.num_se_rd_ports = 0; interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, - "LoadQueue", - Core_device, - coredynp.opt_local, - coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area() + LoadQ->local_result.area); - area.set_area(area.get_area() + LoadQ->local_result.area); + LoadQ.set_params(&interface_ip, + "LoadQueue", + Core_device, + coredynp.opt_local, + coredynp.core_ty); + } + init_params = true; +} + +void LoadStoreU::computeArea() { + if (!init_params) { + std::cerr << "[ LoadStoreU ] Error: ComputeArea() must be called after " + "initializing params" + << std::endl; + exit(1); + } + + dcache.caches.computeArea(); + dcache.area.set_area(dcache.area.get_area() + + dcache.caches.local_result.area); + area.set_area(area.get_area() + dcache.caches.local_result.area); + + // dCache controllers + // miss buffer + dcache.missb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.missb.local_result.area); + area.set_area(area.get_area() + dcache.missb.local_result.area); + + // fill buffer + dcache.ifb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.ifb.local_result.area); + area.set_area(area.get_area() + dcache.ifb.local_result.area); + + dcache.prefetchb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + + dcache.prefetchb.local_result.area); + area.set_area(area.get_area() + dcache.prefetchb.local_result.area); + + if (cache_p == Write_back) { + dcache.wbb.computeArea(); + dcache.area.set_area(dcache.area.get_area() + dcache.wbb.local_result.area); + area.set_area(area.get_area() + dcache.wbb.local_result.area); + } + + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both + * loadQ and StoreQ + */ + LSQ.computeArea(); + LSQ.area.set_area(LSQ.area.get_area() + LSQ.local_result.area); + area.set_area(area.get_area() + LSQ.local_result.area); + // output_data_csv(LSQ.LSQ.local_result); + lsq_height = + LSQ.local_result.cache_ht * + sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ + + if ((coredynp.core_ty == OOO) && + (XML->sys.core[ithCore].load_buffer_size > 0)) { + LoadQ.computeArea(); + LoadQ.area.set_area(LoadQ.area.get_area() + LoadQ.local_result.area); + area.set_area(area.get_area() + LoadQ.local_result.area); // output_data_csv(LoadQ.LoadQ.local_result); lsq_height = - (LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht) * + (LSQ.local_result.cache_ht + LoadQ.local_result.cache_ht) * sqrt(cdb_overhead); /*XML->sys.core[ithCore].number_hardware_threads*/ } area.set_area(area.get_area() * cdb_overhead); } -void LoadStoreU::computeEnergy(bool is_tdp) { - if (!exist) +void LoadStoreU::computePower(bool is_tdp) { + if (!exist) { return; + } if (is_tdp) { // init stats for Peak dcache.caches.stats_t.readAc.access = @@ -392,14 +432,14 @@ void LoadStoreU::computeEnergy(bool is_tdp) { dcache.wbb.tdp_stats = dcache.wbb.stats_t; } - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = - LSQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; + LSQ.stats_t.readAc.access = LSQ.stats_t.writeAc.access = + LSQ.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LSQ.tdp_stats = LSQ.stats_t; if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = - LoadQ->l_ip.num_search_ports * coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; + LoadQ.stats_t.readAc.access = LoadQ.stats_t.writeAc.access = + LoadQ.l_ip.num_search_ports * coredynp.LSU_duty_cycle; + LoadQ.tdp_stats = LoadQ.stats_t; } } else { // init stats for Runtime Dynamic (RTP) @@ -451,26 +491,26 @@ void LoadStoreU::computeEnergy(bool is_tdp) { dcache.prefetchb.rtp_stats = dcache.prefetchb.stats_t; } - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + + LSQ.stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions) * + 2; // flush overhead considered + LSQ.stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions) * - 2; // flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions) * - 2; - LSQ->rtp_stats = LSQ->stats_t; + 2; + LSQ.rtp_stats = LSQ.stats_t; if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + + LoadQ.stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + + XML->sys.core[ithCore].store_instructions; + LoadQ.stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + - XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; + LoadQ.rtp_stats = LoadQ.stats_t; } } dcache.power_t.reset(); - LSQ->power_t.reset(); + LSQ.power_t.reset(); dcache.power_t.readOp.dynamic += (dcache.caches.stats_t.readAc.hit * dcache.caches.local_result.power.readOp.dynamic + @@ -515,30 +555,30 @@ void LoadStoreU::computeEnergy(bool is_tdp) { if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += - LoadQ->stats_t.readAc.access * - (LoadQ->local_result.power.searchOp.dynamic + - LoadQ->local_result.power.readOp.dynamic) + - LoadQ->stats_t.writeAc.access * - LoadQ->local_result.power.writeOp + LoadQ.power_t.reset(); + LoadQ.power_t.readOp.dynamic += + LoadQ.stats_t.readAc.access * + (LoadQ.local_result.power.searchOp.dynamic + + LoadQ.local_result.power.readOp.dynamic) + + LoadQ.stats_t.writeAc.access * + LoadQ.local_result.power.writeOp .dynamic; // every memory access invloves at least two // operations on LoadQ - LSQ->power_t.readOp.dynamic += - LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + - LSQ->local_result.power.readOp.dynamic) + - LSQ->stats_t.writeAc.access * - LSQ->local_result.power.writeOp + LSQ.power_t.readOp.dynamic += + LSQ.stats_t.readAc.access * (LSQ.local_result.power.searchOp.dynamic + + LSQ.local_result.power.readOp.dynamic) + + LSQ.stats_t.writeAc.access * + LSQ.local_result.power.writeOp .dynamic; // every memory access invloves at least two // operations on LSQ } else { - LSQ->power_t.readOp.dynamic += - LSQ->stats_t.readAc.access * (LSQ->local_result.power.searchOp.dynamic + - LSQ->local_result.power.readOp.dynamic) + - LSQ->stats_t.writeAc.access * - LSQ->local_result.power.writeOp + LSQ.power_t.readOp.dynamic += + LSQ.stats_t.readAc.access * (LSQ.local_result.power.searchOp.dynamic + + LSQ.local_result.power.readOp.dynamic) + + LSQ.stats_t.writeAc.access * + LSQ.local_result.power.writeOp .dynamic; // every memory access invloves at least two // operations on LSQ } @@ -559,13 +599,13 @@ void LoadStoreU::computeEnergy(bool is_tdp) { dcache.power = dcache.power + dcache.wbb.local_result.power * pppm_lkg; } - LSQ->power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; - power = power + dcache.power + LSQ->power; + LSQ.power = LSQ.power_t + LSQ.local_result.power * pppm_lkg; + power = power + dcache.power + LSQ.power; if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; - power = power + LoadQ->power; + LoadQ.power = LoadQ.power_t + LoadQ.local_result.power * pppm_lkg; + power = power + LoadQ.power; } } else { // dcache.rt_power = dcache.power_t + @@ -585,18 +625,18 @@ void LoadStoreU::computeEnergy(bool is_tdp) { dcache.rt_power + dcache.wbb.local_result.power * pppm_lkg; } - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power * pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; + LSQ.rt_power = LSQ.power_t + LSQ.local_result.power * pppm_lkg; + rt_power = rt_power + dcache.rt_power + LSQ.rt_power; if ((coredynp.core_ty == OOO) && (XML->sys.core[ithCore].load_buffer_size > 0)) { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power * pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; + LoadQ.rt_power = LoadQ.power_t + LoadQ.local_result.power * pppm_lkg; + rt_power = rt_power + LoadQ.rt_power; } } } -void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { +void LoadStoreU::display(uint32_t indent, int plevel, bool is_tdp) { if (!exist) return; string indent_str(indent, ' '); @@ -629,75 +669,76 @@ void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { cout << endl; if (coredynp.core_ty == Inorder) { cout << indent_str << "Load/Store Queue:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << LSQ.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << LSQ.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LSQ->power.readOp.longer_channel_leakage - : LSQ->power.readOp.leakage) + << (long_channel ? LSQ.power.readOp.longer_channel_leakage + : LSQ.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? LSQ->power.readOp.power_gated_with_long_channel_leakage - : LSQ->power.readOp.power_gated_leakage) + ? LSQ.power.readOp.power_gated_with_long_channel_leakage + : LSQ.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << LSQ.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + << LSQ.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } else { if (XML->sys.core[ithCore].load_buffer_size > 0) { cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << LoadQ.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << LoadQ->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << LoadQ.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LoadQ->power.readOp.longer_channel_leakage - : LoadQ->power.readOp.leakage) + << (long_channel ? LoadQ.power.readOp.longer_channel_leakage + : LoadQ.power.readOp.leakage) << " W" << endl; if (power_gating) - cout << indent_str_next << "Subthreshold Leakage with power gating = " - << (long_channel ? LoadQ->power.readOp - .power_gated_with_long_channel_leakage - : LoadQ->power.readOp.power_gated_leakage) - << " W" << endl; + cout + << indent_str_next << "Subthreshold Leakage with power gating = " + << (long_channel + ? LoadQ.power.readOp.power_gated_with_long_channel_leakage + : LoadQ.power.readOp.power_gated_leakage) + << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << LoadQ.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << LoadQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + << LoadQ.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } cout << indent_str << "StoreQ:" << endl; - cout << indent_str_next << "Area = " << LSQ->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << LSQ.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << LSQ->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << LSQ.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? LSQ->power.readOp.longer_channel_leakage - : LSQ->power.readOp.leakage) + << (long_channel ? LSQ.power.readOp.longer_channel_leakage + : LSQ.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? LSQ->power.readOp.power_gated_with_long_channel_leakage - : LSQ->power.readOp.power_gated_leakage) + ? LSQ.power.readOp.power_gated_with_long_channel_leakage + : LSQ.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << LSQ.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << LSQ->rt_power.readOp.dynamic / executionTime << " W" << endl; + << LSQ.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; } } else { @@ -709,41 +750,30 @@ void LoadStoreU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << dcache.rt_power.readOp.gate_leakage << " W" << endl; if (coredynp.core_ty == Inorder) { cout << indent_str_next << "Load/Store Queue Peak Dynamic = " - << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + << LSQ.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " - << LSQ->rt_power.readOp.leakage << " W" << endl; + << LSQ.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next << "Load/Store Queue Gate Leakage = " - << LSQ->rt_power.readOp.gate_leakage << " W" << endl; + << LSQ.rt_power.readOp.gate_leakage << " W" << endl; } else { cout << indent_str_next << "LoadQ Peak Dynamic = " - << LoadQ->rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "LoadQ Subthreshold Leakage = " - << LoadQ->rt_power.readOp.leakage << " W" << endl; + << LoadQ.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next - << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage + << "LoadQ Subthreshold Leakage = " << LoadQ.rt_power.readOp.leakage + << " W" << endl; + cout << indent_str_next + << "LoadQ Gate Leakage = " << LoadQ.rt_power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "StoreQ Peak Dynamic = " - << LSQ->rt_power.readOp.dynamic * clockRate << " W" << endl; + << LSQ.rt_power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next - << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage + << "StoreQ Subthreshold Leakage = " << LSQ.rt_power.readOp.leakage << " W" << endl; cout << indent_str_next - << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage + << "StoreQ Gate Leakage = " << LSQ.rt_power.readOp.gate_leakage << " W" << endl; } } } -LoadStoreU ::~LoadStoreU() { - - if (!exist) - return; - if (LSQ) { - delete LSQ; - LSQ = 0; - } - if (LoadQ) { - delete LoadQ; - LoadQ = 0; - } -} +LoadStoreU ::~LoadStoreU(){}; diff --git a/src/core/loadstore.h b/src/core/loadstore.h index 2e41567..37fd95b 100644 --- a/src/core/loadstore.h +++ b/src/core/loadstore.h @@ -53,19 +53,24 @@ class LoadStoreU : public Component { double macro_PR_overhead; double lsq_height; DataCache dcache; - ArrayST *LSQ; // it is actually the store queue but for inorder processors it - // serves as both loadQ and StoreQ - ArrayST *LoadQ; + ArrayST LSQ; // it is actually the store queue but for inorder processors it + // serves as both loadQ and StoreQ + ArrayST LoadQ; bool exist; - LoadStoreU(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true); - void computeEnergy(bool is_tdp = true); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); + LoadStoreU(); + void set_params(const ParseXML *XML, + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true); + void computeArea(); + void computePower(bool is_tdp = true); + void display(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~LoadStoreU(); + +private: + bool init_params; }; #endif // __LOAD_STORE_U_H__ diff --git a/src/core/renaming_unit.cc b/src/core/renaming_unit.cc index b3dfe24..52ad8c6 100644 --- a/src/core/renaming_unit.cc +++ b/src/core/renaming_unit.cc @@ -395,8 +395,7 @@ void RENAMINGU::set_params(const ParseXML *XML, &interface_ip, coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl.set_params( - &interface_ip, coredynp, coredynp.phy_freg_width); + fdcl.set_params(&interface_ip, coredynp, coredynp.phy_freg_width); } else if (coredynp.scheu_ty == ReservationStation) { if (coredynp.rm_ty == RAMbased) { @@ -639,8 +638,7 @@ void RENAMINGU::set_params(const ParseXML *XML, &interface_ip, coredynp, coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl.set_params( - &interface_ip, coredynp, coredynp.phy_freg_width); + fdcl.set_params(&interface_ip, coredynp, coredynp.phy_freg_width); } } if (coredynp.core_ty == Inorder && coredynp.issueW > 1) { @@ -648,12 +646,10 @@ void RENAMINGU::set_params(const ParseXML *XML, * Multiple issue in order processor can do without renaming, but dcl is a * must. */ - idcl.set_params( - &interface_ip, - coredynp, - coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR - fdcl.set_params( - &interface_ip, coredynp, coredynp.phy_freg_width); + idcl.set_params(&interface_ip, + coredynp, + coredynp.phy_ireg_width); // TODO:Separate 2 sections See TR + fdcl.set_params(&interface_ip, coredynp, coredynp.phy_freg_width); } init_params = true; } @@ -1130,10 +1126,8 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { if (is_tdp) { if (coredynp.core_ty == OOO) { if (coredynp.scheu_ty == PhysicalRegFile) { - iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; - fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; + iFRAT.power = iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; + fFRAT.power = fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; ffreeL.power = ffreeL.power_t + ffreeL.local_result.power; power = power + @@ -1146,10 +1140,8 @@ void RENAMINGU::computeStaticPower(bool is_tdp) { power = power + (iRRAT.power + fRRAT.power); } } else if (coredynp.scheu_ty == ReservationStation) { - iFRAT.power = - iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; - fFRAT.power = - fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; + iFRAT.power = iFRAT.power_t + (iFRAT.local_result.power) + idcl.power_t; + fFRAT.power = fFRAT.power_t + (fFRAT.local_result.power) + fdcl.power_t; ifreeL.power = ifreeL.power_t + ifreeL.local_result.power; power = power + (iFRAT.power + fFRAT.power) + ifreeL.power; if ((coredynp.rm_ty == RAMbased) && (coredynp.globalCheckpoint < 1)) { @@ -1362,8 +1354,8 @@ void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { } else { cout << indent_str << "Int DCL:" << endl; cout << indent_str_next - << "Peak Dynamic = " << idcl.power.readOp.dynamic * clockRate - << " W" << endl; + << "Peak Dynamic = " << idcl.power.readOp.dynamic * clockRate << " W" + << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel ? idcl.power.readOp.longer_channel_leakage : idcl.power.readOp.leakage) @@ -1382,8 +1374,8 @@ void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { << idcl.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << indent_str << "FP DCL:" << endl; cout << indent_str_next - << "Peak Dynamic = " << fdcl.power.readOp.dynamic * clockRate - << " W" << endl; + << "Peak Dynamic = " << fdcl.power.readOp.dynamic * clockRate << " W" + << endl; cout << indent_str_next << "Subthreshold Leakage = " << (long_channel ? fdcl.power.readOp.longer_channel_leakage : fdcl.power.readOp.leakage) @@ -1453,8 +1445,9 @@ void RENAMINGU::display(uint32_t indent, int plevel, bool is_tdp) { << " W" << endl; cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl.rt_power.readOp.dynamic * clockRate << " W" << endl; - cout << indent_str_next << "FP DCL Subthreshold Leakage = " - << fdcl.rt_power.readOp.leakage << " W" << endl; + cout << indent_str_next + << "FP DCL Subthreshold Leakage = " << fdcl.rt_power.readOp.leakage + << " W" << endl; cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl.rt_power.readOp.gate_leakage << " W" << endl; diff --git a/src/core/scheduler.cc b/src/core/scheduler.cc index 12a8dc8..91e9ca8 100644 --- a/src/core/scheduler.cc +++ b/src/core/scheduler.cc @@ -370,12 +370,13 @@ void SchedulerU::set_params(const ParseXML *XML_interface, coredynp.opt_local, coredynp.core_ty); } - instruction_selection.set_params(is_default, - XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, - &interface_ip, - Core_device, - coredynp.core_ty); + instruction_selection.set_params( + is_default, + XML->sys.core[ithCore].instruction_window_size, + coredynp.peak_issueW, + &interface_ip, + Core_device, + coredynp.core_ty); } init_params = true; @@ -417,7 +418,6 @@ void SchedulerU::computeArea() { coredynp.num_pipelines); area.set_area(area.get_area() + int_inst_window.local_result.area * coredynp.num_pipelines); - } if (coredynp.core_ty == OOO) { @@ -823,6 +823,4 @@ void SchedulerU::computeDynamicPower(bool is_tdp) { // cout<<"selection"<opcode comparator - } - else { + } else { compare_bits += 16 + 8 + 8; } @@ -100,8 +100,8 @@ void dep_resource_conflict_check::set_params(const InputParameter *configure_int power.readOp.dynamic *= sckRation; power.writeOp.dynamic *= sckRation; power.searchOp.dynamic *= sckRation; - } - +} + void dep_resource_conflict_check::conflict_check_power() { double Ctotal; int num_comparators; diff --git a/src/logic/dep_resource_conflict_check.h b/src/logic/dep_resource_conflict_check.h index 7a12dd6..92e930a 100644 --- a/src/logic/dep_resource_conflict_check.h +++ b/src/logic/dep_resource_conflict_check.h @@ -67,9 +67,9 @@ class dep_resource_conflict_check : public Component { powerDef power_t; void set_params(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, - int compare_bits_, - bool _is_default = true); + const CoreDynParam &dyn_p_, + int compare_bits_, + bool _is_default = true); void conflict_check_power(); double compare_cap(); diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index a1edce6..d3a8810 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -32,13 +32,12 @@ #include "inst_decoder.h" void inst_decoder::set_params(bool _is_default, - const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_, - enum Core_type core_ty_) - { + const InputParameter *configure_interface, + int opcode_length_, + int num_decoders_, + bool x86_, + enum Device_ty device_ty_, + enum Core_type core_ty_) { /* * Instruction decoder is different from n to 2^n decoders * that are commonly used in row decoders in memory arrays. @@ -62,9 +61,12 @@ void inst_decoder::set_params(bool _is_default, * it involve both decoding instructions into u-ops and * merge u-ops when doing micro-ops fusion. */ - is_default=_is_default; opcode_length=opcode_length_; - num_decoders=num_decoders_; x86=x86_; device_ty=device_ty_; - core_ty=core_ty_; + is_default = _is_default; + opcode_length = opcode_length_; + num_decoders = num_decoders_; + x86 = x86_; + device_ty = device_ty_; + core_ty = core_ty_; bool is_dram = false; double pmos_to_nmos_sizing_r; double load_nmos_width, load_pmos_width; @@ -90,31 +92,30 @@ void inst_decoder::set_params(bool _is_default, R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; final_dec.set_params(num_decoded_signals, - false, - C_driver_load, - R_wire_load, - false /*is_fa*/, - false /*is_dram*/, - false /*wl_tr*/, // to use peri device - cell); + false, + C_driver_load, + R_wire_load, + false /*is_fa*/, + false /*is_dram*/, + false /*wl_tr*/, // to use peri device + cell); final_dec.computeArea(); predec_blk1.set_params(num_decoded_signals, - &final_dec, - 0, // Assuming predec and dec are back to back - 0, - 1, // Each Predec only drives one final dec - false /*is_dram*/, - true); + &final_dec, + 0, // Assuming predec and dec are back to back + 0, + 1, // Each Predec only drives one final dec + false /*is_dram*/, + true); predec_blk2.set_params(num_decoded_signals, - &final_dec, - 0, // Assuming predec and dec are back to back - 0, - 1, // Each Predec only drives one final dec - false /*is_dram*/, - false); - + &final_dec, + 0, // Assuming predec and dec are back to back + 0, + 1, // Each Predec only drives one final dec + false /*is_dram*/, + false); predec_blk_drv1.set_params(0, &predec_blk1, false); @@ -124,14 +125,14 @@ void inst_decoder::set_params(bool _is_default, init_params = true; } -void inst_decoder::computeArea(){ - if (!init_params) { +void inst_decoder::computeArea() { + if (!init_params) { std::cerr << "[ Inst_decoder ] Error: must set params before calling " "computeArea()\n"; exit(1); } - double area_decoder = final_dec.area.get_area() * num_decoded_signals * + double area_decoder = final_dec.area.get_area() * num_decoded_signals * num_decoder_segments * num_decoders; // double w_decoder = area_decoder / area.get_h(); double area_pre_dec = @@ -144,7 +145,7 @@ void inst_decoder::computeArea(){ area.set_area(area.get_area() * macro_layout_overhead * chip_PR_overhead); } -void inst_decoder::computeDynamicPower(){ +void inst_decoder::computeDynamicPower() { inst_decoder_delay_power(); double sckRation = g_tp.sckt_co_eff; @@ -230,7 +231,6 @@ void inst_decoder::leakage_feedback(double temperature) { inst_decoder::~inst_decoder() { local_result.cleanup(); - delete pre_dec.blk1; delete pre_dec.blk2; delete pre_dec.drv1; diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index 282d5d4..daa770b 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -19,15 +19,14 @@ class inst_decoder : public Component { public: - void set_params(bool _is_default, - const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_ = Core_device, - enum Core_type core_ty_ = Inorder); - inst_decoder(){init_params = false;}; + const InputParameter *configure_interface, + int opcode_length_, + int num_decoders_, + bool x86_, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + inst_decoder() { init_params = false; }; bool is_default; int opcode_length; int num_decoders; @@ -41,10 +40,10 @@ class inst_decoder : public Component { Decoder final_dec; Predec pre_dec; - PredecBlk predec_blk1; - PredecBlk predec_blk2; - PredecBlkDrv predec_blk_drv1; - PredecBlkDrv predec_blk_drv2; + PredecBlk predec_blk1; + PredecBlk predec_blk2; + PredecBlkDrv predec_blk_drv1; + PredecBlkDrv predec_blk_drv2; statsDef tdp_stats; statsDef rtp_stats; statsDef stats_t; @@ -52,11 +51,12 @@ class inst_decoder : public Component { void computeArea(); void computeDynamicPower(); - + void inst_decoder_delay_power(); ~inst_decoder(); void leakage_feedback(double temperature); - private: + +private: bool init_params; }; diff --git a/src/logic/selection_logic.cc b/src/logic/selection_logic.cc index 33830e4..0278634 100644 --- a/src/logic/selection_logic.cc +++ b/src/logic/selection_logic.cc @@ -63,21 +63,25 @@ selection_logic::selection_logic(bool _is_default, power.readOp.power_gated_with_long_channel_leakage = power.readOp.power_gated_leakage * long_channel_device_reduction; } -void selection_logic::set_params(bool _is_default, int win_entries_, int issue_width_, const InputParameter *configure_interface, enum Device_ty device_ty_ , enum Core_type core_ty_ ) -{ +void selection_logic::set_params(bool _is_default, + int win_entries_, + int issue_width_, + const InputParameter *configure_interface, + enum Device_ty device_ty_, + enum Core_type core_ty_) { is_default = _is_default; win_entries = win_entries_; issue_width = issue_width_; device_ty = device_ty_; core_ty = core_ty_; - + l_ip = *configure_interface; local_result = init_interface(&l_ip); // init_tech_params(l_ip.F_sz_um, false); // win_entries=numIBEntries;//IQentries; // issue_width=issueWidth; - selection_power(); + selection_power(); double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; power.writeOp.dynamic *= sckRation; @@ -94,7 +98,6 @@ void selection_logic::set_params(bool _is_default, int win_entries_, int issue_w power.readOp.power_gated_leakage * long_channel_device_reduction; } - void selection_logic::selection_power() { // based on cost effective superscalar // processor TR pp27-31 double Ctotal, Cor, Cpencode; diff --git a/src/logic/selection_logic.h b/src/logic/selection_logic.h index 4bb4c1e..78fb2e1 100644 --- a/src/logic/selection_logic.h +++ b/src/logic/selection_logic.h @@ -49,7 +49,7 @@ class selection_logic : public Component { public: -selection_logic(){}; + selection_logic(){}; selection_logic( bool _is_default, int win_entries_, @@ -66,13 +66,13 @@ selection_logic(){}; int num_threads; enum Device_ty device_ty; enum Core_type core_ty; - + void set_params(bool _is_default, - int win_entries_, - int issue_width_, - const InputParameter *configure_interface, - enum Device_ty device_ty_ = Core_device, - enum Core_type core_ty_ = Inorder); + int win_entries_, + int issue_width_, + const InputParameter *configure_interface, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); void selection_power(); void leakage_feedback(double temperature); // TODO }; diff --git a/unit_test/unit_test.py b/unit_test/unit_test.py index d37ce80..d2e069d 100755 --- a/unit_test/unit_test.py +++ b/unit_test/unit_test.py @@ -41,17 +41,32 @@ kill_flag = [] parser = argparse.ArgumentParser() -parser.add_argument('--input', type=str, default="./input/basic_test_1", help="Test Input Path") -parser.add_argument("--output", type=str, default="./output/basic_test_1", help="Test Output Path") -parser.add_argument("--golden", type=str, default="./golden/basic_test_1", help="Test Golden Path") -parser.add_argument("--serial", type=bool, default=False, help="Serial if true, Basic if false") -parser.add_argument("--nthreads", type=int, default=1, help="Number of Threads to unit test with") +parser.add_argument( + '--input', type=str, default="./input/basic_test_1", help="Test Input Path") +parser.add_argument( + "--output", + type=str, + default="./output/basic_test_1", + help="Test Output Path") +parser.add_argument( + "--golden", + type=str, + default="./golden/basic_test_1", + help="Test Golden Path") +parser.add_argument( + "--serial", type=bool, default=False, help="Serial if true, Basic if false") +parser.add_argument( + "--nthreads", + type=int, + default=1, + help="Number of Threads to unit test with") args = parser.parse_args() input_path = args.input output_path = args.output golden_path = args.golden + def print_info(info, *args): if verbose: print("[ " + __file__ + " ] " + info + " " + @@ -156,7 +171,7 @@ def run_test_serializaiton_create(vector, i): ], stdout=so, stderr=se) - t = Timer(timeout_limit, kill, [p,i]) + t = Timer(timeout_limit, kill, [p, i]) t.start() p.wait() t.cancel() @@ -203,16 +218,18 @@ def run_test_serialization_restore(vector, sfile, i): return 1 return 0 + results = [] iteration = 0 + def worker_thread_normal(iq, tid): global results global iteration """ Worker Thread Normal expects just the test vector name and test numer in the queue [name, number] """ while not iq.empty(): - test = iq.get(); + test = iq.get() name = test[0] number = test[1] if run_test_normal(name, tid) == 0: @@ -220,6 +237,7 @@ def worker_thread_normal(iq, tid): else: results[number] = False + def worker_thread_serial(iq, tid): global results global iteration @@ -227,7 +245,7 @@ def worker_thread_serial(iq, tid): file name in the queue [name, number, sfile] """ global results while not iq.empty(): - test = iq.get(); + test = iq.get() name = test[0] number = test[1] sfile = test[2] @@ -248,16 +266,16 @@ def get_vectors(): f = 0 print_info(start) vectors = get_vectors() - results = [False]*len(vectors) + results = [False] * len(vectors) print_info("Found " + str(len(vectors)) + " test vectors") InputQueue = queue.Queue() threads = [] - kill_flag = [False]*args.nthreads + kill_flag = [False] * args.nthreads if not args.serial: # Prepare queue with inputs: - for vector,i in zip(vectors, range(len(vectors))): + for vector, i in zip(vectors, range(len(vectors))): InputQueue.put([vector, i]) # Create Threads: for i in range(args.nthreads): @@ -269,21 +287,22 @@ def get_vectors(): thr.join() else: # Create a Serialized File: - if(len(vectors) > 0): + if (len(vectors) > 0): if run_test_serializaiton_create(vectors[0], 0) == 0: # Prepare queue with inputs: - for vector,i in zip(vectors, range(len(vectors))): + for vector, i in zip(vectors, range(len(vectors))): InputQueue.put([vector, i, vectors[0]]) # Create Threads: for i in range(args.nthreads): - thr = threading.Thread(target=worker_thread_serial, args=[InputQueue, i]) + thr = threading.Thread( + target=worker_thread_serial, args=[InputQueue, i]) thr.start() threads.append(thr) # Join Threads: for thr in threads: thr.join() else: - print_info("No files in "+input_path) + print_info("No files in " + input_path) sys.exit(1) for i in results: if i: diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index 1089a01..fc2eb78 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -75,44 +75,44 @@ print_info "Launching Tests; NTHREADS=$NTHREADS" print_info "#########################################################" print_info "# Unit Test Basic 1 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/basic_test_1 \ -# --output=./output/basic_test_1 \ -# --golden=./golden/basic_test_1 \ -# --nthreads=$NTHREADS +./unit_test.py \ + --input=./input/basic_test_1 \ + --output=./output/basic_test_1 \ + --golden=./golden/basic_test_1 \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 1 #" print_info "#########################################################" -./unit_test.py \ - --input=./input/serialization_test_1 \ - --output=./output/serialization_test_1 \ - --golden=./golden/serialization_test_1 \ - --serial=True \ - --nthreads=$NTHREADS +#./unit_test.py \ +# --input=./input/serialization_test_1 \ +# --output=./output/serialization_test_1 \ +# --golden=./golden/serialization_test_1 \ +# --serial=True \ +# --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 2 #" print_info "#########################################################" -./unit_test.py \ - --input=./input/serialization_test_2 \ - --output=./output/serialization_test_2 \ - --golden=./golden/serialization_test_2 \ - --serial=True \ - --nthreads=$NTHREADS +#./unit_test.py \ +# --input=./input/serialization_test_2 \ +# --output=./output/serialization_test_2 \ +# --golden=./golden/serialization_test_2 \ +# --serial=True \ +# --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 3 #" print_info "#########################################################" -./unit_test.py \ - --input=./input/serialization_test_3 \ - --output=./output/serialization_test_3 \ - --golden=./golden/serialization_test_3 \ - --serial=True \ - --nthreads=$NTHREADS +#./unit_test.py \ +# --input=./input/serialization_test_3 \ +# --output=./output/serialization_test_3 \ +# --golden=./golden/serialization_test_3 \ +# --serial=True \ +# --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 4 #" print_info "#########################################################" -./unit_test.py \ - --input=./input/serialization_test_4 \ - --output=./output/serialization_test_4 \ - --golden=./golden/serialization_test_4 \ - --serial=True \ - --nthreads=$NTHREADS +#./unit_test.py \ +# --input=./input/serialization_test_4 \ +# --output=./output/serialization_test_4 \ +# --golden=./golden/serialization_test_4 \ +# --serial=True \ +# --nthreads=$NTHREADS From 3efd789ceca262b6c2ccba7deaed32f604d8e51c Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 18:10:29 -0500 Subject: [PATCH 49/59] Bugfix in merge --- src/array.cc | 8 ++++---- src/core/core.cc | 4 ++-- src/core/instfetch.cc | 6 +++--- unit_test/golden/basic_test_1/Alpha21364.golden | 10 ---------- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/array.cc b/src/array.cc index d154378..da98f73 100644 --- a/src/array.cc +++ b/src/array.cc @@ -238,12 +238,12 @@ void ArrayST::optimize_array() { // For array structures except CAM and FA, Give warning but still provide // a result with best timing found if (throughput_overflow == true) - cout << "Warning: " << name + std::cerr << "Warning: " << name << " array structure cannot satisfy throughput constraint." - << endl; + << std::endl; if (latency_overflow == true) - cout << "Warning: " << name - << " array structure cannot satisfy latency constraint." << endl; + std::cerr << "Warning: " << name + << " array structure cannot satisfy latency constraint." << std::endl; } // else diff --git a/src/core/core.cc b/src/core/core.cc index e0c325c..d55fddc 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -158,8 +158,8 @@ void Core::computeEnergy(bool is_tdp) { double rtp_pipeline_coe; double num_units = 4.0; if (is_tdp) { - lsu.computePower(is_tdp); ifu->computeDynamicPower(is_tdp); + lsu.computePower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); @@ -248,8 +248,8 @@ void Core::computeEnergy(bool is_tdp) { } } else { - lsu.computePower(is_tdp); ifu->computeDynamicPower(is_tdp); + lsu.computePower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 7762700..989b347 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -419,10 +419,10 @@ void InstFetchU::computeArea(){ area.set_area(area.get_area() + BTB.local_result.area); } - ID_misc.computeArea(); + ID_misc.computeArea(); ID_operand.computeArea(); ID_inst.computeArea(); - ID_inst.computeDynamicPower(); + //ID_inst.computeDynamicPower(); // TODO: X86 decoder should decode the inst in cyclic mode under the control @@ -836,4 +836,4 @@ InstFetchU ::~InstFetchU() { if (!exist) return; -} \ No newline at end of file +} diff --git a/unit_test/golden/basic_test_1/Alpha21364.golden b/unit_test/golden/basic_test_1/Alpha21364.golden index 566fee7..a945a37 100644 --- a/unit_test/golden/basic_test_1/Alpha21364.golden +++ b/unit_test/golden/basic_test_1/Alpha21364.golden @@ -1,15 +1,5 @@ McPAT (version 1.3 of Feb, 2015) is computing the target processor... -Warning: icache array structure cannot satisfy throughput constraint. -Warning: icache array structure cannot satisfy latency constraint. -Warning: Branch Target Buffer array structure cannot satisfy throughput constraint. -Warning: Branch Target Buffer array structure cannot satisfy latency constraint. -Warning: Global Predictor array structure cannot satisfy throughput constraint. -Warning: Global Predictor array structure cannot satisfy latency constraint. -Warning: Predictor Chooser array structure cannot satisfy throughput constraint. -Warning: Predictor Chooser array structure cannot satisfy latency constraint. -Warning: dcache array structure cannot satisfy throughput constraint. -Warning: dcache array structure cannot satisfy latency constraint. McPAT (version 1.3 of Feb, 2015) results (current print level is 5) ***************************************************************************************** From 9351d1461c828db2c1e6987b1b5b3f8c1de64b73 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 23 Jun 2020 20:19:06 -0500 Subject: [PATCH 50/59] Initiating changes in the undiff_core --- src/core/core.cc | 11 ++++++----- src/core/loadstore.cc | 2 +- src/core/loadstore.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index d55fddc..ef52cee 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -46,13 +46,14 @@ Core::Core(const ParseXML *XML_interface, int ithCore_, - InputParameter *interface_ip_) - : XML(XML_interface), ithCore(ithCore_), interface_ip(*interface_ip_), - ifu(0), mmu(0), rnu(0), corepipe(0), undiffCore(0), l2cache(0) { + InputParameter *interface_ip_){ /* * initialize, compute and optimize individual components. */ + XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; + ifu=0; mmu=0; rnu=0; corepipe=0; undiffCore=0; l2cache=0; + bool exit_flag = true; double pipeline_area_per_unit; @@ -159,7 +160,7 @@ void Core::computeEnergy(bool is_tdp) { double num_units = 4.0; if (is_tdp) { ifu->computeDynamicPower(is_tdp); - lsu.computePower(is_tdp); + lsu.computeDynamicPower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); @@ -249,7 +250,7 @@ void Core::computeEnergy(bool is_tdp) { } else { ifu->computeDynamicPower(is_tdp); - lsu.computePower(is_tdp); + lsu.computeDynamicPower(is_tdp); mmu->computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc index d01f44b..e553086 100644 --- a/src/core/loadstore.cc +++ b/src/core/loadstore.cc @@ -391,7 +391,7 @@ void LoadStoreU::computeArea() { area.set_area(area.get_area() * cdb_overhead); } -void LoadStoreU::computePower(bool is_tdp) { +void LoadStoreU::computeDynamicPower(bool is_tdp) { if (!exist) { return; } diff --git a/src/core/loadstore.h b/src/core/loadstore.h index 37fd95b..da7fd9c 100644 --- a/src/core/loadstore.h +++ b/src/core/loadstore.h @@ -65,7 +65,7 @@ class LoadStoreU : public Component { const CoreDynParam &dyn_p_, bool exist_ = true); void computeArea(); - void computePower(bool is_tdp = true); + void computeDynamicPower(bool is_tdp = true); void display(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~LoadStoreU(); From 54e564a69bb4f2892c2b800985c7638d5557c604 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 23 Jun 2020 22:10:45 -0500 Subject: [PATCH 51/59] Completed the undiff core --- src/core/core.cc | 34 +++++++++++++++++----------------- src/core/core.h | 2 +- src/logic/undiff_core.cc | 30 +++++++++++++++++------------- src/logic/undiff_core.h | 8 ++++++-- 4 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index ef52cee..a55c12a 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -52,7 +52,7 @@ Core::Core(const ParseXML *XML_interface, */ XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; - ifu=0; mmu=0; rnu=0; corepipe=0; undiffCore=0; l2cache=0; + ifu=0; mmu=0; rnu=0; corepipe=0; l2cache=0; bool exit_flag = true; @@ -86,7 +86,11 @@ Core::Core(const ParseXML *XML_interface, exu.computeArea(); exu.set_stats(XML); exu.computeStaticPower(); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip, coredynp, exit_flag); + undiffCore.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); + undiffCore.computeArea(); + // undiffCore.computeArea(); + // undiffCore.computeDynamicPower(); + if (coredynp.core_ty == OOO) { rnu = new RENAMINGU(); rnu->set_params(XML, ithCore, &interface_ip, coredynp); @@ -131,8 +135,8 @@ Core::Core(const ParseXML *XML_interface, } } - if (undiffCore->exist) { - area.set_area(area.get_area() + undiffCore->area.get_area()); + if (undiffCore.exist) { + area.set_area(area.get_area() + undiffCore.area.get_area()); } if (XML->sys.Private_L2) { @@ -238,7 +242,7 @@ void Core::computeEnergy(bool is_tdp) { // power.readOp.dynamic*clockRate << " W" << endl; } - power = power + undiffCore->power; + power = power + undiffCore.power; if (XML->sys.Private_L2) { @@ -348,7 +352,7 @@ void Core::computeEnergy(bool is_tdp) { rt_power = rt_power + mmu->rt_power; } - rt_power = rt_power + undiffCore->power; + rt_power = rt_power + undiffCore.power; // cout << "EXE = " << exu.power.readOp.dynamic*clockRate << " W" //<< endl; if (XML->sys.Private_L2) { @@ -521,25 +525,25 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } // if (plevel >2) // { - // if (undiffCore->exist) + // if (undiffCore.exist) // { // cout << indent_str << "Undifferentiated Core" << // endl; cout << indent_str_next << "Area = " << - // undiffCore->area.get_area()*1e-6<< " mm^2" << endl; cout + // undiffCore.area.get_area()*1e-6<< " mm^2" << endl; cout // << indent_str_next << "Peak Dynamic = " << - // undiffCore->power.readOp.dynamic*clockRate << " W" << endl; + // undiffCore.power.readOp.dynamic*clockRate << " W" << endl; //// cout << indent_str_next << "Subthreshold Leakage = " - ///<< undiffCore->power.readOp.leakage <<" W" << endl; + ///<< undiffCore.power.readOp.leakage <<" W" << endl; // cout << indent_str_next << "Subthreshold Leakage //= //" // << //(long_channel? - // undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) + // undiffCore.power.readOp.longer_channel_leakage:undiffCore.power.readOp.leakage) //<< " W" << endl; cout << indent_str_next << "Gate Leakage = " - //<< undiffCore->power.readOp.gate_leakage << " W" << endl; + //<< undiffCore.power.readOp.gate_leakage << " W" << endl; // // cout << indent_str_next << "Runtime Dynamic = " - //<< undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; + //<< undiffCore.rt_power.readOp.dynamic/executionTime << " W" << endl; // cout //<sys.Embedded), - pipeline_stage(coredynp.pipeline_stages), - num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW), - exist(exist_) // is_default(_is_default) -{ +{ + XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; + coredynp=dyn_p_; core_ty=coredynp.core_ty; embedded=XML->sys.Embedded; + pipeline_stage=coredynp.pipeline_stages; + num_hthreads=coredynp.num_hthreads; issue_width=coredynp.issueW; + exist=exist_; if (!exist) - return; - double undifferentiated_core = 0; + return; +} + + void UndiffCore::computeArea(){ + double undifferentiated_core = 0; double core_tx_density = 0; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); double undifferentiated_core_coe; // XML_interface=_XML_interface; uca_org_t result2; result2 = init_interface(&interface_ip); - - // Compute undifferentiated core area at 90nm. + // Compute undifferentiated core area at 90nm. if (embedded == false) { // Based on the results of polynomial/log curve fitting based on // undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, @@ -92,7 +94,7 @@ UndiffCore::UndiffCore(const ParseXML *XML_interface, // undifferentiated_core = 3*1e6; // undifferentiated_core *= // g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = undifferentiated_core * + power.readOp.leakage = undifferentiated_core * (core_tx_density)*cmos_Isub_leakage( 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, @@ -119,6 +121,7 @@ UndiffCore::UndiffCore(const ParseXML *XML_interface, area.set_area(undifferentiated_core); + scktRatio = g_tp.sckt_co_eff; power.readOp.dynamic *= scktRatio; power.writeOp.dynamic *= scktRatio; @@ -151,7 +154,8 @@ UndiffCore::UndiffCore(const ParseXML *XML_interface, // // std::cout< class UndiffCore : public Component { -public: - UndiffCore(const ParseXML *XML_interface, +public: + + UndiffCore(){}; + void set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_, const CoreDynParam &dyn_p_, @@ -65,6 +67,8 @@ class UndiffCore : public Component { bool opt_performance, embedded; double pipeline_stage, num_hthreads, issue_width; bool is_default; + void computeArea(); + void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~UndiffCore(){}; From 03fde9642f9f03b1f2d32b7ca34d10d58dfab43f Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Tue, 23 Jun 2020 22:38:57 -0500 Subject: [PATCH 52/59] Corepipe fixed --- src/core/core.cc | 38 ++++++++++++++++++-------------------- src/core/core.h | 2 +- src/logic/pipeline.cc | 13 ++++++------- src/logic/pipeline.h | 5 +++-- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index a55c12a..3145747 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -52,7 +52,7 @@ Core::Core(const ParseXML *XML_interface, */ XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; - ifu=0; mmu=0; rnu=0; corepipe=0; l2cache=0; + ifu=0; mmu=0; rnu=0; l2cache=0; bool exit_flag = true; @@ -97,20 +97,22 @@ Core::Core(const ParseXML *XML_interface, rnu->computeArea(); rnu->set_stats(XML); } - corepipe = new Pipeline(&interface_ip, coredynp); + corepipe.set_params(&interface_ip, coredynp); + corepipe.computeArea(); + if (coredynp.core_ty == OOO) { pipeline_area_per_unit = - (corepipe->area.get_area() * coredynp.num_pipelines) / 5.0; + (corepipe.area.get_area() * coredynp.num_pipelines) / 5.0; if (rnu->exist) { rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); } } else { pipeline_area_per_unit = - (corepipe->area.get_area() * coredynp.num_pipelines) / 4.0; + (corepipe.area.get_area() * coredynp.num_pipelines) / 4.0; } - // area.set_area(area.get_area()+ corepipe->area.get_area()); + // area.set_area(area.get_area()+ corepipe.area.get_area()); if (ifu->exist) { ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + ifu->area.get_area()); @@ -179,7 +181,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units); // User need to feed a duty cycle to improve accuracy if (rnu->exist) { - rnu->power = rnu->power + corepipe->power * pppm_t; + rnu->power = rnu->power + corepipe.power * pppm_t; power = power + rnu->power; } } @@ -192,11 +194,11 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units); // cout << "IFU = " << // ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power * pppm_t; + ifu->power = ifu->power + corepipe.power * pppm_t; // cout << "IFU = " << // ifu->power.readOp.dynamic*clockRate << " W" << endl; // cout << "1/4 pipe = " << - // corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; + // corepipe.power.readOp.dynamic*clockRate/num_units << " W" << endl; power = power + ifu->power; // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; @@ -207,7 +209,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - lsu.power = lsu.power + corepipe->power * pppm_t; + lsu.power = lsu.power + corepipe.power * pppm_t; // cout << "LSU = " << // lsu.power.readOp.dynamic*clockRate << " W" << endl; power = power + lsu.power; @@ -220,7 +222,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - exu.power = exu.power + corepipe->power * pppm_t; + exu.power = exu.power + corepipe.power * pppm_t; // cout << "EXE = " << // exu.power.readOp.dynamic*clockRate << " W" << endl; power = power + exu.power; @@ -234,7 +236,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - mmu->power = mmu->power + corepipe->power * pppm_t; + mmu->power = mmu->power + corepipe.power * pppm_t; // cout << "MMU = " << // mmu->power.readOp.dynamic*clockRate << " W" << endl; power = power + mmu->power; @@ -273,7 +275,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); if (rnu->exist) { - rnu->rt_power = rnu->rt_power + corepipe->power * pppm_t; + rnu->rt_power = rnu->rt_power + corepipe.power * pppm_t; rt_power = rt_power + rnu->rt_power; } @@ -295,7 +297,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - ifu->rt_power = ifu->rt_power + corepipe->power * pppm_t; + ifu->rt_power = ifu->rt_power + corepipe.power * pppm_t; rt_power = rt_power + ifu->rt_power; } if (lsu.exist) { @@ -313,7 +315,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - lsu.rt_power = lsu.rt_power + corepipe->power * pppm_t; + lsu.rt_power = lsu.rt_power + corepipe.power * pppm_t; rt_power = rt_power + lsu.rt_power; } if (exu.exist) { @@ -330,7 +332,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - exu.rt_power = exu.rt_power + corepipe->power * pppm_t; + exu.rt_power = exu.rt_power + corepipe.power * pppm_t; rt_power = rt_power + exu.rt_power; } if (mmu->exist) { @@ -348,7 +350,7 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - mmu->rt_power = mmu->rt_power + corepipe->power * pppm_t; + mmu->rt_power = mmu->rt_power + corepipe.power * pppm_t; rt_power = rt_power + mmu->rt_power; } @@ -601,10 +603,6 @@ Core ::~Core() { mmu = 0; } - if (corepipe) { - delete corepipe; - corepipe = 0; - } if (l2cache) { delete l2cache; l2cache = 0; diff --git a/src/core/core.h b/src/core/core.h index f41480f..ff83c1a 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -59,7 +59,7 @@ class Core : public Component { MemManU *mmu; EXECU exu; RENAMINGU *rnu; - Pipeline *corepipe; + Pipeline corepipe; UndiffCore undiffCore; SharedCache *l2cache; CoreDynParam coredynp; diff --git a/src/logic/pipeline.cc b/src/logic/pipeline.cc index 7d9a71a..9885672 100644 --- a/src/logic/pipeline.cc +++ b/src/logic/pipeline.cc @@ -33,16 +33,15 @@ #include "dff_cell.h" -Pipeline::Pipeline(const InputParameter *configure_interface, +void Pipeline::set_params(const InputParameter *configure_interface, const CoreDynParam &dyn_p_, enum Device_ty device_ty_, bool _is_core_pipeline, bool _is_default) - : l_ip(*configure_interface), coredynp(dyn_p_), device_ty(device_ty_), - is_core_pipeline(_is_core_pipeline), is_default(_is_default), - num_piperegs(0.0) - { + l_ip=*configure_interface; coredynp=dyn_p_; device_ty=device_ty_; + is_core_pipeline=_is_core_pipeline; is_default=_is_default; + num_piperegs=0.0; local_result = init_interface(&l_ip); if (!coredynp.Embedded) process_ind = true; @@ -58,10 +57,9 @@ Pipeline::Pipeline(const InputParameter *configure_interface, pmos_to_nmos_sz_ratio(); // this was 30 micron for the 0.8 // micron process load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); - compute(); } -void Pipeline::compute() { +void Pipeline::computeArea(){ compute_stage_vector(); DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); pipe_reg.compute_DFF_cell(); @@ -105,6 +103,7 @@ void Pipeline::compute() { area.set_area(area.get_area() * macro_layout_overhead); } + void Pipeline::compute_stage_vector() { double num_stages, tot_stage_vector, per_stage_vector; int opcode_length = diff --git a/src/logic/pipeline.h b/src/logic/pipeline.h index f6dd544..295dcdc 100644 --- a/src/logic/pipeline.h +++ b/src/logic/pipeline.h @@ -49,7 +49,8 @@ class Pipeline : public Component { public: - Pipeline(const InputParameter *configure_interface, + Pipeline(){}; + void set_params(const InputParameter *configure_interface, const CoreDynParam &dyn_p_, enum Device_ty device_ty_ = Core_device, bool _is_core_pipeline = true, @@ -70,8 +71,8 @@ class Pipeline : public Component { // commitWidth, instruction_length; int PC_width, opcode_length, // num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; bool // thread_clock_gated; bool in_order, multithreaded; + void computeArea(); void compute_stage_vector(); - void compute(); ~Pipeline() { local_result.cleanup(); }; }; From d3df757077c41dcd34f00247c0a4f1d72a4065ad Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 23 Jun 2020 23:05:48 -0500 Subject: [PATCH 53/59] refactor: cacti/decoder --- src/array.cc | 7 +- src/cacti/CMakeLists.txt | 7 +- src/cacti/decoder.cc | 1920 ----------------------- src/cacti/decoder.h | 279 ---- src/cacti/decoder/CMakeLists.txt | 17 + src/cacti/decoder/decoder.cc | 377 +++++ src/cacti/decoder/decoder.h | 135 ++ src/cacti/decoder/driver.cc | 171 ++ src/cacti/decoder/driver.h | 107 ++ src/cacti/decoder/predec.cc | 228 +++ src/cacti/decoder/predec.h | 81 + src/cacti/decoder/predec_blk.cc | 872 ++++++++++ src/cacti/decoder/predec_blk.h | 115 ++ src/cacti/decoder/predec_blk_drv.cc | 432 +++++ src/cacti/decoder/predec_blk_drv.h | 111 ++ src/cacti/mat.h | 4 + src/core/branch_predictor.h | 26 + src/core/exec_unit.h | 33 + src/core/instfetch.cc | 74 +- src/core/instfetch.h | 40 +- src/core/loadstore.h | 16 + src/core/mmu.h | 22 + src/core/regfile.h | 19 + src/core/renaming_unit.h | 25 + src/core/scheduler.h | 27 + src/logic/CMakeLists.txt | 2 +- src/logic/dep_resource_conflict_check.h | 31 +- src/logic/dff_cell.h | 27 + src/logic/inst_decoder.h | 32 +- src/logic/pipeline.h | 25 +- src/logic/selection_logic.h | 20 + 31 files changed, 3030 insertions(+), 2252 deletions(-) delete mode 100644 src/cacti/decoder.cc delete mode 100644 src/cacti/decoder.h create mode 100644 src/cacti/decoder/CMakeLists.txt create mode 100644 src/cacti/decoder/decoder.cc create mode 100644 src/cacti/decoder/decoder.h create mode 100644 src/cacti/decoder/driver.cc create mode 100644 src/cacti/decoder/driver.h create mode 100644 src/cacti/decoder/predec.cc create mode 100644 src/cacti/decoder/predec.h create mode 100644 src/cacti/decoder/predec_blk.cc create mode 100644 src/cacti/decoder/predec_blk.h create mode 100644 src/cacti/decoder/predec_blk_drv.cc create mode 100644 src/cacti/decoder/predec_blk_drv.h diff --git a/src/array.cc b/src/array.cc index da98f73..caaf15b 100644 --- a/src/array.cc +++ b/src/array.cc @@ -239,11 +239,12 @@ void ArrayST::optimize_array() { // a result with best timing found if (throughput_overflow == true) std::cerr << "Warning: " << name - << " array structure cannot satisfy throughput constraint." - << std::endl; + << " array structure cannot satisfy throughput constraint." + << std::endl; if (latency_overflow == true) std::cerr << "Warning: " << name - << " array structure cannot satisfy latency constraint." << std::endl; + << " array structure cannot satisfy latency constraint." + << std::endl; } // else diff --git a/src/cacti/CMakeLists.txt b/src/cacti/CMakeLists.txt index d877551..22922f2 100644 --- a/src/cacti/CMakeLists.txt +++ b/src/cacti/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(decoder) + add_library(cacti Ucache.h arbiter.h @@ -8,7 +10,6 @@ add_library(cacti component.h const.h crossbar.h - decoder.h htree2.h io.h mat.h @@ -28,7 +29,6 @@ add_library(cacti cacti_interface.cc component.cc crossbar.cc - decoder.cc htree2.cc io.cc main.cc @@ -43,3 +43,6 @@ add_library(cacti wire.cc ) target_include_directories(cacti PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(cacti + LINK_PUBLIC + decoder) diff --git a/src/cacti/decoder.cc b/src/cacti/decoder.cc deleted file mode 100644 index 1d069e4..0000000 --- a/src/cacti/decoder.cc +++ /dev/null @@ -1,1920 +0,0 @@ -/***************************************************************************** - * McPAT/CACTI - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#include "decoder.h" - -#include "area.h" -#include "parameter.h" - -#include -#include -#include - -using namespace std; - -Decoder::Decoder(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_, - int nodes_DSTN_) - : exist(false), C_ld_dec_out(_C_ld_dec_out), - R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), - // power(), - fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), - total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), - nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { - - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { - w_dec_n[i] = 0; - w_dec_p[i] = 0; - } - - /* - * _num_dec_signals is the number of decoded signal as output - * num_addr_bits_dec is the number of signal to be decoded - * as the decoders input. - */ - int num_addr_bits_dec = _log2(_num_dec_signals); - - if (num_addr_bits_dec < 4) { - if (flag_way_select) { - exist = true; - num_in_signals = 2; - } else { - num_in_signals = 0; - } - } else { - exist = true; - - if (flag_way_select) { - num_in_signals = 3; - } else { - num_in_signals = 2; - } - } - - assert(cell_.h > 0); - assert(cell_.w > 0); - // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; - // area.h = 4 * cell.h; - area.h = g_tp.h_dec * cell_.h; - height = cell_.h; - compute_widths(); - compute_area(); -} - -void Decoder::set_params(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_, - int nodes_DSTN_) { - - exist = false; - C_ld_dec_out = _C_ld_dec_out; - - R_wire_dec_out = _R_wire_dec_out; - num_gates = 0; - num_gates_min = 2; - delay = 0; - fully_assoc = fully_assoc_; - is_dram = is_dram_; - is_wl_tr = is_wl_tr_; - total_driver_nwidth = 0; - total_driver_pwidth = 0; - sleeptx = NULL; - nodes_DSTN = nodes_DSTN_; - power_gating = power_gating_; - - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { - w_dec_n[i] = 0; - w_dec_p[i] = 0; - } - - /* - * _num_dec_signals is the number of decoded signal as output - * num_addr_bits_dec is the number of signal to be decoded - * as the decoders input. - */ - int num_addr_bits_dec = _log2(_num_dec_signals); - - if (num_addr_bits_dec < 4) { - if (flag_way_select) { - exist = true; - num_in_signals = 2; - } else { - num_in_signals = 0; - } - } else { - exist = true; - - if (flag_way_select) { - num_in_signals = 3; - } else { - num_in_signals = 2; - } - } - - assert(cell_.h > 0); - assert(cell_.w > 0); - // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; - // area.h = 4 * cell.h; - area.h = g_tp.h_dec * cell_.h; - - height = cell_.h; -} - -void Decoder::compute_widths() { - double F; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - - if (exist) { - if (num_in_signals == 2 || fully_assoc) { - w_dec_n[0] = 2 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2; - } else { - w_dec_n[0] = 3 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3; - } - - F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + - gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); - num_gates = logical_effort(num_gates_min, - num_in_signals == 2 ? gnand2 : gnand3, - F, - w_dec_n, - w_dec_p, - C_ld_dec_out, - p_to_n_sz_ratio, - is_dram, - is_wl_tr, - g_tp.max_w_nmos_dec); - } -} - -void Decoder::computeArea() { - compute_widths(); - compute_area(); -} -void Decoder::compute_area() { - double cumulative_area = 0; - double cumulative_curr = 0; // cumulative leakage current - double cumulative_curr_Ig = 0; // cumulative leakage current - - if (exist) { // First check if this decoder exists - if (num_in_signals == 2) { - cumulative_area = - compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = - cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); - cumulative_curr_Ig = - cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); - } else if (num_in_signals == 3) { - cumulative_area = - compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = - cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); - ; - cumulative_curr_Ig = - cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); - } - - for (int i = 1; i < num_gates; i++) { - cumulative_area += - compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); - cumulative_curr += - cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - cumulative_curr_Ig = - cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - } - power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage = - cumulative_curr * g_tp.peri_global.Vcc_min; - power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; - - area.w = (cumulative_area / area.h); - if (power_gating) { - compute_power_gating(); - cumulative_area += sleeptx->area.get_area(); - area.w = (cumulative_area / area.h); - } - } -} - -void Decoder::compute_power_gating() { - // For all driver chains there is only one sleep transistors to save area - // Total transistor width for sleep tx calculation - for (int i = 0; i < num_gates; i++) { - total_driver_nwidth += w_dec_n[i]; - total_driver_pwidth += w_dec_p[i]; - } - - // compute sleep tx - bool is_footer = false; - double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); - double detalV; - double c_wakeup; - - c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, height); // Psleep tx - detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; - // if (g_ip->power_gating) - sleeptx = new Sleep_tx(g_ip->perfloss, - Isat_subarray, - is_footer, - c_wakeup, - detalV, - nodes_DSTN, - area); -} - -double Decoder::compute_delays(double inrisetime) { - if (exist) { - double ret_val = 0; // outrisetime - int i; - double rd, tf, this_delay, c_load, c_intrinsic, Vpp; - double Vdd = g_tp.peri_global.Vdd; - - if ((is_wl_tr) && (is_dram)) { - Vpp = g_tp.vpp; - } else if (is_wl_tr) { - Vpp = g_tp.sram_cell.Vdd; - } else { - Vpp = g_tp.peri_global.Vdd; - } - - // first check whether a decoder is required at all - rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = - drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * - num_in_signals + - drain_C_(w_dec_n[0], - NCH, - num_in_signals, - 1, - area.h, - is_dram, - false, - is_wl_tr); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - // cout<<"w_dec_n["<<0<<"] = "<blk; - blk2 = drv2_->blk; - drv1 = drv1_; - drv2 = drv2_; - - driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + - drv1->power_nand3_path.readOp.leakage + - drv2->power_nand2_path.readOp.leakage + - drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = - blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; - - driver_power.readOp.power_gated_leakage = - drv1->power_nand2_path.readOp.power_gated_leakage + - drv1->power_nand3_path.readOp.power_gated_leakage + - drv2->power_nand2_path.readOp.power_gated_leakage + - drv2->power_nand3_path.readOp.power_gated_leakage; - block_power.readOp.power_gated_leakage = - blk1->power_nand2_path.readOp.power_gated_leakage + - blk1->power_nand3_path.readOp.power_gated_leakage + - blk1->power_L2.readOp.power_gated_leakage + - blk2->power_nand2_path.readOp.power_gated_leakage + - blk2->power_nand3_path.readOp.power_gated_leakage + - blk2->power_L2.readOp.power_gated_leakage; - - power.readOp.leakage = - driver_power.readOp.leakage + block_power.readOp.leakage; - - power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + - block_power.readOp.power_gated_leakage; - - driver_power.readOp.gate_leakage = - drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; - block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = - driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; -} -Predec::Predec(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) - : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { - driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + - drv1->power_nand3_path.readOp.leakage + - drv2->power_nand2_path.readOp.leakage + - drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = - blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; - - driver_power.readOp.power_gated_leakage = - drv1->power_nand2_path.readOp.power_gated_leakage + - drv1->power_nand3_path.readOp.power_gated_leakage + - drv2->power_nand2_path.readOp.power_gated_leakage + - drv2->power_nand3_path.readOp.power_gated_leakage; - block_power.readOp.power_gated_leakage = - blk1->power_nand2_path.readOp.power_gated_leakage + - blk1->power_nand3_path.readOp.power_gated_leakage + - blk1->power_L2.readOp.power_gated_leakage + - blk2->power_nand2_path.readOp.power_gated_leakage + - blk2->power_nand3_path.readOp.power_gated_leakage + - blk2->power_L2.readOp.power_gated_leakage; - - power.readOp.leakage = - driver_power.readOp.leakage + block_power.readOp.leakage; - - power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + - block_power.readOp.power_gated_leakage; - - driver_power.readOp.gate_leakage = - drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; - block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = - driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; -} - -void PredecBlkDrv::leakage_feedback(double temperature) { - double leak_nand2_path = 0; - double leak_nand3_path = 0; - double gate_leak_nand2_path = 0; - double gate_leak_nand3_path = 0; - - if (flag_driver_exists) { // first check whether a predecoder block driver is - // needed - for (int i = 0; i < number_gates_nand2_path; ++i) { - leak_nand2_path += cmos_Isub_leakage( - width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); - gate_leak_nand2_path += cmos_Ig_leakage( - width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); - } - leak_nand2_path *= - (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - gate_leak_nand2_path *= - (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - - for (int i = 0; i < number_gates_nand3_path; ++i) { - leak_nand3_path += cmos_Isub_leakage( - width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); - gate_leak_nand3_path += cmos_Ig_leakage( - width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); - } - leak_nand3_path *= - (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - gate_leak_nand3_path *= - (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - - power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; - power_nand2_path.readOp.gate_leakage = - gate_leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = - gate_leak_nand3_path * g_tp.peri_global.Vdd; - } -} - -double Predec::compute_delays(double inrisetime) { - // TODO: Jung Ho thinks that predecoder block driver locates between decoder - // and predecoder block. - pair tmp_pair1, tmp_pair2; - tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); - tmp_pair1 = blk1->compute_delays(tmp_pair1); - tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); - tmp_pair2 = blk2->compute_delays(tmp_pair2); - tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); - - driver_power.readOp.dynamic = - drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + - drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + - drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + - drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; - - block_power.readOp.dynamic = - blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + - blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + - blk1->power_L2.readOp.dynamic + - blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + - blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + - blk2->power_L2.readOp.dynamic; - - power.readOp.dynamic = - driver_power.readOp.dynamic + block_power.readOp.dynamic; - - delay = tmp_pair1.first; - return tmp_pair1.second; -} - -void Predec::leakage_feedback(double temperature) { - drv1->leakage_feedback(temperature); - drv2->leakage_feedback(temperature); - blk1->leakage_feedback(temperature); - blk2->leakage_feedback(temperature); - - driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + - drv1->power_nand3_path.readOp.leakage + - drv2->power_nand2_path.readOp.leakage + - drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = - blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; - power.readOp.leakage = - driver_power.readOp.leakage + block_power.readOp.leakage; - - driver_power.readOp.gate_leakage = - drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; - block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = - driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; -} - -// returns -pair -Predec::get_max_delay_before_decoder(pair input_pair1, - pair input_pair2) { - pair ret_val; - double delay; - - delay = drv1->delay_nand2_path + blk1->delay_nand2_path; - ret_val.first = delay; - ret_val.second = input_pair1.first; - delay = drv1->delay_nand3_path + blk1->delay_nand3_path; - if (ret_val.first < delay) { - ret_val.first = delay; - ret_val.second = input_pair1.second; - } - delay = drv2->delay_nand2_path + blk2->delay_nand2_path; - if (ret_val.first < delay) { - ret_val.first = delay; - ret_val.second = input_pair2.first; - } - delay = drv2->delay_nand3_path + blk2->delay_nand3_path; - if (ret_val.first < delay) { - ret_val.first = delay; - ret_val.second = input_pair2.second; - } - - return ret_val; -} - -Driver::Driver(double c_gate_load_, - double c_wire_load_, - double r_wire_load_, - bool is_dram, - bool power_gating_, - int nodes_DSTN_) - : number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), - c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), - // power(), - is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0), - sleeptx(NULL), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { - width_n[i] = 0; - width_p[i] = 0; - } - - compute_widths(); - compute_area(); -} - -void Driver::compute_widths() { - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - double c_load = c_gate_load + c_wire_load; - width_n[0] = g_tp.min_w_nmos_; - width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - - double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); - number_gates = logical_effort(min_number_gates, - 1, - F, - width_n, - width_p, - c_load, - p_to_n_sz_ratio, - is_dram_, - false, - g_tp.max_w_nmos_); -} - -void Driver::compute_area() { - double cumulative_area = 0; - - area.h = g_tp.cell_h_def; - for (int i = 0; i < number_gates; i++) { - cumulative_area += - compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); - } - area.w = (cumulative_area / area.h); - if (power_gating) { - compute_power_gating(); - cumulative_area += sleeptx->area.get_area(); - area.w = (cumulative_area / area.h); - } -} - -void Driver::compute_power_gating() { - // For all driver chains there is only one sleep transistors to save area - // Total transistor width for sleep tx calculation - for (int i = 0; i < number_gates; i++) { - total_driver_nwidth += width_n[i]; - total_driver_pwidth += width_p[i]; - } - - // compute sleep tx - bool is_footer = false; - double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); - double detalV; - double c_wakeup; - - c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h); // Psleep tx - detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; - // if (g_ip->power_gating) - sleeptx = new Sleep_tx(g_ip->perfloss, - Isat_subarray, - is_footer, - c_wakeup, - detalV, - nodes_DSTN, // default is 1 for drivers - area); -} - -double Driver::compute_delay(double inrisetime) { - int i; - double rd, c_load, c_intrinsic, tf; - double this_delay = 0; - - for (i = 0; i < number_gates - 1; ++i) { - rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_load = gate_C(width_n[i + 1] + width_p[i + 1], 0.0, is_dram_); - c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += - (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += - cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage += - cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vcc_min; - power.readOp.gate_leakage += - cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vdd; - } - - i = number_gates - 1; - c_load = c_gate_load + c_wire_load; - rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + - r_wire_load * (c_wire_load / 2 + c_gate_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - power.readOp.dynamic += - (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += - cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vdd; - power.readOp.power_gated_leakage += - cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vcc_min; - power.readOp.gate_leakage += - cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * - g_tp.peri_global.Vdd; - - return this_delay / (1.0 - 0.5); -} - -// TODO: add sleep tx in predec/predecblk/predecdriver diff --git a/src/cacti/decoder.h b/src/cacti/decoder.h deleted file mode 100644 index 63847a6..0000000 --- a/src/cacti/decoder.h +++ /dev/null @@ -1,279 +0,0 @@ -/***************************************************************************** - * McPAT/CACTI - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#ifndef __DECODER_H__ -#define __DECODER_H__ - -#include "area.h" -#include "component.h" -#include "parameter.h" -#include "powergating.h" - -#include - -using namespace std; - -class Decoder : public Component { -public: - Decoder(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_ = false, - int nodes_DSTN_ = 1); - Decoder(){}; - void set_params(int _num_dec_signals, - bool flag_way_select, - double _C_ld_dec_out, - double _R_wire_dec_out, - bool fully_assoc_, - bool is_dram_, - bool is_wl_tr_, - const Area &cell_, - bool power_gating_ = false, - int nodes_DSTN_ = 1); - bool exist; - int num_in_signals; - double C_ld_dec_out; - double R_wire_dec_out; - int num_gates; - int num_gates_min; - double w_dec_n[MAX_NUMBER_GATES_STAGE]; - double w_dec_p[MAX_NUMBER_GATES_STAGE]; - double delay; - // powerDef power; - bool fully_assoc; - bool is_dram; - bool is_wl_tr; - - double height; - double total_driver_nwidth; - double total_driver_pwidth; - Sleep_tx *sleeptx; - - int nodes_DSTN; - bool power_gating; - - void computeArea(); - void compute_widths(); - void compute_area(); - double compute_delays(double inrisetime); // return outrisetime - void compute_power_gating(); - - void leakage_feedback(double temperature); - - ~Decoder() { - if (sleeptx != 0) - delete sleeptx; - }; -}; - -class PredecBlk : public Component { -public: - PredecBlk(){}; - PredecBlk(int num_dec_signals, - Decoder *dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); - void set_params(int num_dec_signals, - Decoder *dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); - - Decoder *dec; - bool exist; - int number_input_addr_bits; - double C_ld_predec_blk_out; - double R_wire_predec_blk_out; - int branch_effort_nand2_gate_output; - int branch_effort_nand3_gate_output; - bool flag_two_unique_paths; - int flag_L2_gate; - int number_inputs_L1_gate; - int number_gates_L1_nand2_path; - int number_gates_L1_nand3_path; - int number_gates_L2; - int min_number_gates_L1; - int min_number_gates_L2; - int num_L1_active_nand2_path; - int num_L1_active_nand3_path; - double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; - double w_L2_n[MAX_NUMBER_GATES_STAGE]; - double w_L2_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; - powerDef power_L2; - - bool is_dram_; - - void compute_widths(); - void compute_area(); - - void leakage_feedback(double temperature); - - pair - compute_delays(pair inrisetime); // - // return -}; - -class PredecBlkDrv : public Component { -public: - void set_params(int way_select_, PredecBlk *blk_, bool is_dram); - PredecBlkDrv(){}; - PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram); - - int flag_driver_exists; - int number_input_addr_bits; - int number_gates_nand2_path; - int number_gates_nand3_path; - int min_number_gates; - int num_buffers_driving_1_nand2_load; - int num_buffers_driving_2_nand2_load; - int num_buffers_driving_4_nand2_load; - int num_buffers_driving_2_nand3_load; - int num_buffers_driving_8_nand3_load; - int num_buffers_nand3_path; - double c_load_nand2_path_out; - double c_load_nand3_path_out; - double r_load_nand2_path_out; - double r_load_nand3_path_out; - double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; - - PredecBlk *blk; - Decoder *dec; - bool is_dram_; - int way_select; - - void compute_widths(); - void compute_area(); - - void leakage_feedback(double temperature); - - pair compute_delays( - double inrisetime_nand2_path, - double inrisetime_nand3_path); // return - - inline int num_addr_bits_nand2_path() { - return num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load; - } - inline int num_addr_bits_nand3_path() { - return num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load; - } - double get_rdOp_dynamic_E(int num_act_mats_hor_dir); -}; - -class Predec : public Component { -public: - Predec(){}; - void set_params(PredecBlkDrv *drv1, PredecBlkDrv *drv2); - Predec(PredecBlkDrv *drv1, PredecBlkDrv *drv2); - - double compute_delays(double inrisetime); // return outrisetime - - void leakage_feedback(double temperature); - PredecBlk *blk1; - PredecBlk *blk2; - PredecBlkDrv *drv1; - PredecBlkDrv *drv2; - - powerDef block_power; - powerDef driver_power; - -private: - // returns - pair - get_max_delay_before_decoder(pair input_pair1, - pair input_pair2); -}; - -class Driver : public Component { -public: - Driver(double c_gate_load_, - double c_wire_load_, - double r_wire_load_, - bool is_dram, - bool power_gating_ = false, - int nodes_DSTN_ = 1); - - int number_gates; - int min_number_gates; - double width_n[MAX_NUMBER_GATES_STAGE]; - double width_p[MAX_NUMBER_GATES_STAGE]; - double c_gate_load; - double c_wire_load; - double r_wire_load; - double delay; - // powerDef power; - bool is_dram_; - - double total_driver_nwidth; - double total_driver_pwidth; - Sleep_tx *sleeptx; - - int nodes_DSTN; - bool power_gating; - - void compute_widths(); - void compute_area(); - double compute_delay(double inrisetime); - - void compute_power_gating(); - - ~Driver() { - if (sleeptx != 0) - delete sleeptx; - }; -}; - -#endif diff --git a/src/cacti/decoder/CMakeLists.txt b/src/cacti/decoder/CMakeLists.txt new file mode 100644 index 0000000..62e132f --- /dev/null +++ b/src/cacti/decoder/CMakeLists.txt @@ -0,0 +1,17 @@ +add_library(decoder + decoder.h + decoder.cc + driver.h + driver.cc + predec.h + predec.cc + predec_blk.h + predec_blk.cc + predec_blk_drv.h + predec_blk_drv.cc +) +target_include_directories(decoder PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(decoder + LINK_PUBLIC + cacti + top) diff --git a/src/cacti/decoder/decoder.cc b/src/cacti/decoder/decoder.cc new file mode 100644 index 0000000..a64ba66 --- /dev/null +++ b/src/cacti/decoder/decoder.cc @@ -0,0 +1,377 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "decoder.h" + +#include "area.h" +#include "parameter.h" + +#include +#include +#include + +using namespace std; + +Decoder::Decoder(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_, + int nodes_DSTN_) + : exist(false), C_ld_dec_out(_C_ld_dec_out), + R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), + // power(), + fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), + total_driver_nwidth(0), total_driver_pwidth(0), sleeptx(NULL), + nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + w_dec_n[i] = 0; + w_dec_p[i] = 0; + } + + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); + + if (num_addr_bits_dec < 4) { + if (flag_way_select) { + exist = true; + num_in_signals = 2; + } else { + num_in_signals = 0; + } + } else { + exist = true; + + if (flag_way_select) { + num_in_signals = 3; + } else { + num_in_signals = 2; + } + } + + assert(cell_.h > 0); + assert(cell_.w > 0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + // area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell_.h; + height = cell_.h; + compute_widths(); + compute_area(); +} + +void Decoder::set_params(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_, + int nodes_DSTN_) { + + exist = false; + C_ld_dec_out = _C_ld_dec_out; + + R_wire_dec_out = _R_wire_dec_out; + num_gates = 0; + num_gates_min = 2; + delay = 0; + fully_assoc = fully_assoc_; + is_dram = is_dram_; + is_wl_tr = is_wl_tr_; + total_driver_nwidth = 0; + total_driver_pwidth = 0; + sleeptx = NULL; + nodes_DSTN = nodes_DSTN_; + power_gating = power_gating_; + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + w_dec_n[i] = 0; + w_dec_p[i] = 0; + } + + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); + + if (num_addr_bits_dec < 4) { + if (flag_way_select) { + exist = true; + num_in_signals = 2; + } else { + num_in_signals = 0; + } + } else { + exist = true; + + if (flag_way_select) { + num_in_signals = 3; + } else { + num_in_signals = 2; + } + } + + assert(cell_.h > 0); + assert(cell_.w > 0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + // area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell_.h; + + height = cell_.h; +} + +void Decoder::compute_widths() { + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + + if (exist) { + if (num_in_signals == 2 || fully_assoc) { + w_dec_n[0] = 2 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2; + } else { + w_dec_n[0] = 3 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3; + } + + F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); + num_gates = logical_effort(num_gates_min, + num_in_signals == 2 ? gnand2 : gnand3, + F, + w_dec_n, + w_dec_p, + C_ld_dec_out, + p_to_n_sz_ratio, + is_dram, + is_wl_tr, + g_tp.max_w_nmos_dec); + } +} + +void Decoder::computeArea() { + compute_widths(); + compute_area(); +} +void Decoder::compute_area() { + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + if (exist) { // First check if this decoder exists + if (num_in_signals == 2) { + cumulative_area = + compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + } else if (num_in_signals == 3) { + cumulative_area = + compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + ; + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + } + + for (int i = 1; i < num_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); + cumulative_curr += + cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + } + power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; + power.readOp.power_gated_leakage = + cumulative_curr * g_tp.peri_global.Vcc_min; + power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; + + area.w = (cumulative_area / area.h); + if (power_gating) { + compute_power_gating(); + cumulative_area += sleeptx->area.get_area(); + area.w = (cumulative_area / area.h); + } + } +} + +void Decoder::compute_power_gating() { + // For all driver chains there is only one sleep transistors to save area + // Total transistor width for sleep tx calculation + for (int i = 0; i < num_gates; i++) { + total_driver_nwidth += w_dec_n[i]; + total_driver_pwidth += w_dec_p[i]; + } + + // compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, height); // Psleep tx + detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; + // if (g_ip->power_gating) + sleeptx = new Sleep_tx(g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + nodes_DSTN, + area); +} + +double Decoder::compute_delays(double inrisetime) { + if (exist) { + double ret_val = 0; // outrisetime + int i; + double rd, tf, this_delay, c_load, c_intrinsic, Vpp; + double Vdd = g_tp.peri_global.Vdd; + + if ((is_wl_tr) && (is_dram)) { + Vpp = g_tp.vpp; + } else if (is_wl_tr) { + Vpp = g_tp.sram_cell.Vdd; + } else { + Vpp = g_tp.peri_global.Vdd; + } + + // first check whether a decoder is required at all + rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = + drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * + num_in_signals + + drain_C_(w_dec_n[0], + NCH, + num_in_signals, + 1, + area.h, + is_dram, + false, + is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + // cout<<"w_dec_n["<<0<<"] = "< +#include +#include +#include +#include + +using namespace std; + +class Decoder : public Component { +public: + Decoder(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_ = false, + int nodes_DSTN_ = 1); + Decoder(){}; + void set_params(int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area &cell_, + bool power_gating_ = false, + int nodes_DSTN_ = 1); + bool exist; + int num_in_signals; + double C_ld_dec_out; + double R_wire_dec_out; + int num_gates; + int num_gates_min; + double w_dec_n[MAX_NUMBER_GATES_STAGE]; + double w_dec_p[MAX_NUMBER_GATES_STAGE]; + double delay; + // powerDef power; + bool fully_assoc; + bool is_dram; + bool is_wl_tr; + + double height; + double total_driver_nwidth; + double total_driver_pwidth; + Sleep_tx *sleeptx; + + int nodes_DSTN; + bool power_gating; + + void computeArea(); + void compute_widths(); + void compute_area(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_gating(); + + void leakage_feedback(double temperature); + + ~Decoder() { + if (sleeptx != 0) + delete sleeptx; + }; + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar ∃ + ar &num_in_signals; + ar &C_ld_dec_out; + ar &R_wire_dec_out; + ar &num_gates; + ar &num_gates_min; + ar &w_dec_n; + ar &w_dec_p; + ar &delay; + ar &fully_assoc; + ar &is_dram; + ar &is_wl_tr; + + ar &height; + ar &total_driver_nwidth; + ar &total_driver_pwidth; + + ar &nodes_DSTN; + ar &power_gating; + Component::serialize(ar, version); + } +}; + +#endif diff --git a/src/cacti/decoder/driver.cc b/src/cacti/decoder/driver.cc new file mode 100644 index 0000000..89f5135 --- /dev/null +++ b/src/cacti/decoder/driver.cc @@ -0,0 +1,171 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "driver.h" + +#include "area.h" +#include "parameter.h" + +#include +#include +#include + +Driver::Driver(double c_gate_load_, + double c_wire_load_, + double r_wire_load_, + bool is_dram, + bool power_gating_, + int nodes_DSTN_) + : number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), + c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), + // power(), + is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0), + sleeptx(NULL), nodes_DSTN(nodes_DSTN_), power_gating(power_gating_) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_n[i] = 0; + width_p[i] = 0; + } + + compute_widths(); + compute_area(); +} + +void Driver::compute_widths() { + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double c_load = c_gate_load + c_wire_load; + width_n[0] = g_tp.min_w_nmos_; + width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + + double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); + number_gates = logical_effort(min_number_gates, + 1, + F, + width_n, + width_p, + c_load, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); +} + +void Driver::compute_area() { + double cumulative_area = 0; + + area.h = g_tp.cell_h_def; + for (int i = 0; i < number_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); + } + area.w = (cumulative_area / area.h); + if (power_gating) { + compute_power_gating(); + cumulative_area += sleeptx->area.get_area(); + area.w = (cumulative_area / area.h); + } +} + +void Driver::compute_power_gating() { + // For all driver chains there is only one sleep transistors to save area + // Total transistor width for sleep tx calculation + for (int i = 0; i < number_gates; i++) { + total_driver_nwidth += width_n[i]; + total_driver_pwidth += width_p[i]; + } + + // compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h); // Psleep tx + detalV = g_tp.peri_global.Vdd - g_tp.peri_global.Vcc_min; + // if (g_ip->power_gating) + sleeptx = new Sleep_tx(g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + nodes_DSTN, // default is 1 for drivers + area); +} + +double Driver::compute_delay(double inrisetime) { + int i; + double rd, c_load, c_intrinsic, tf; + double this_delay = 0; + + for (i = 0; i < number_gates - 1; ++i) { + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_load = gate_C(width_n[i + 1] + width_p[i + 1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += + (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.power_gated_leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vcc_min; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + } + + i = number_gates - 1; + c_load = c_gate_load + c_wire_load; + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + + r_wire_load * (c_wire_load / 2 + c_gate_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + power.readOp.dynamic += + (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.power_gated_leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vcc_min; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + + return this_delay / (1.0 - 0.5); +} diff --git a/src/cacti/decoder/driver.h b/src/cacti/decoder/driver.h new file mode 100644 index 0000000..1bb4a72 --- /dev/null +++ b/src/cacti/decoder/driver.h @@ -0,0 +1,107 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __DRIVER_H__ +#define __DRIVER_H__ + +#include "area.h" +#include "component.h" +#include "parameter.h" +#include "powergating.h" + +#include +#include +#include +#include +#include + +class Driver : public Component { +public: + Driver(double c_gate_load_, + double c_wire_load_, + double r_wire_load_, + bool is_dram, + bool power_gating_ = false, + int nodes_DSTN_ = 1); + + int number_gates; + int min_number_gates; + double width_n[MAX_NUMBER_GATES_STAGE]; + double width_p[MAX_NUMBER_GATES_STAGE]; + double c_gate_load; + double c_wire_load; + double r_wire_load; + double delay; + // powerDef power; + bool is_dram_; + + double total_driver_nwidth; + double total_driver_pwidth; + Sleep_tx *sleeptx; + + int nodes_DSTN; + bool power_gating; + + void compute_widths(); + void compute_area(); + double compute_delay(double inrisetime); + + void compute_power_gating(); + + ~Driver() { + if (sleeptx != 0) + delete sleeptx; + }; + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &number_gates; + ar &min_number_gates; + ar &width_n; + ar &width_p; + ar &c_gate_load; + ar &c_wire_load; + ar &r_wire_load; + ar &delay; + ar &is_dram_; + ar &total_driver_nwidth; + ar &total_driver_pwidth; + ar &nodes_DSTN; + ar &power_gating; + Component::serialize(ar, version); + } +}; + +#endif // __DRIVER_H__ diff --git a/src/cacti/decoder/predec.cc b/src/cacti/decoder/predec.cc new file mode 100644 index 0000000..6d3eab6 --- /dev/null +++ b/src/cacti/decoder/predec.cc @@ -0,0 +1,228 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "predec.h" + +#include "area.h" +#include "parameter.h" + +#include +#include +#include + +void Predec::set_params(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) { + blk1 = drv1_->blk; + blk2 = drv2_->blk; + drv1 = drv1_; + drv2 = drv2_; + + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + + driver_power.readOp.power_gated_leakage = + drv1->power_nand2_path.readOp.power_gated_leakage + + drv1->power_nand3_path.readOp.power_gated_leakage + + drv2->power_nand2_path.readOp.power_gated_leakage + + drv2->power_nand3_path.readOp.power_gated_leakage; + block_power.readOp.power_gated_leakage = + blk1->power_nand2_path.readOp.power_gated_leakage + + blk1->power_nand3_path.readOp.power_gated_leakage + + blk1->power_L2.readOp.power_gated_leakage + + blk2->power_nand2_path.readOp.power_gated_leakage + + blk2->power_nand3_path.readOp.power_gated_leakage + + blk2->power_L2.readOp.power_gated_leakage; + + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + + block_power.readOp.power_gated_leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; +} + +Predec::Predec(PredecBlkDrv *drv1_, PredecBlkDrv *drv2_) + : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + + driver_power.readOp.power_gated_leakage = + drv1->power_nand2_path.readOp.power_gated_leakage + + drv1->power_nand3_path.readOp.power_gated_leakage + + drv2->power_nand2_path.readOp.power_gated_leakage + + drv2->power_nand3_path.readOp.power_gated_leakage; + block_power.readOp.power_gated_leakage = + blk1->power_nand2_path.readOp.power_gated_leakage + + blk1->power_nand3_path.readOp.power_gated_leakage + + blk1->power_L2.readOp.power_gated_leakage + + blk2->power_nand2_path.readOp.power_gated_leakage + + blk2->power_nand3_path.readOp.power_gated_leakage + + blk2->power_L2.readOp.power_gated_leakage; + + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + + block_power.readOp.power_gated_leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; +} + +double Predec::compute_delays(double inrisetime) { + // TODO: Jung Ho thinks that predecoder block driver locates between decoder + // and predecoder block. + pair tmp_pair1, tmp_pair2; + tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); + tmp_pair1 = blk1->compute_delays(tmp_pair1); + tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); + tmp_pair2 = blk2->compute_delays(tmp_pair2); + tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); + + driver_power.readOp.dynamic = + drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; + + block_power.readOp.dynamic = + blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk1->power_L2.readOp.dynamic + + blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk2->power_L2.readOp.dynamic; + + power.readOp.dynamic = + driver_power.readOp.dynamic + block_power.readOp.dynamic; + + delay = tmp_pair1.first; + return tmp_pair1.second; +} + +void Predec::leakage_feedback(double temperature) { + drv1->leakage_feedback(temperature); + drv2->leakage_feedback(temperature); + blk1->leakage_feedback(temperature); + blk2->leakage_feedback(temperature); + + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = + blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; + power.readOp.leakage = + driver_power.readOp.leakage + block_power.readOp.leakage; + + driver_power.readOp.gate_leakage = + drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = + driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; +} + +// returns +pair +Predec::get_max_delay_before_decoder(pair input_pair1, + pair input_pair2) { + pair ret_val; + double delay; + + delay = drv1->delay_nand2_path + blk1->delay_nand2_path; + ret_val.first = delay; + ret_val.second = input_pair1.first; + delay = drv1->delay_nand3_path + blk1->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair1.second; + } + delay = drv2->delay_nand2_path + blk2->delay_nand2_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.first; + } + delay = drv2->delay_nand3_path + blk2->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.second; + } + + return ret_val; +} diff --git a/src/cacti/decoder/predec.h b/src/cacti/decoder/predec.h new file mode 100644 index 0000000..e4b8eed --- /dev/null +++ b/src/cacti/decoder/predec.h @@ -0,0 +1,81 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __PREDEC_H__ +#define __PREDEC_H__ + +#include "area.h" +#include "component.h" +#include "parameter.h" +#include "powergating.h" +#include "predec_blk.h" +#include "predec_blk_drv.h" + +#include +#include +#include +#include +#include + +class Predec : public Component { +public: + Predec(){}; + void set_params(PredecBlkDrv *drv1, PredecBlkDrv *drv2); + Predec(PredecBlkDrv *drv1, PredecBlkDrv *drv2); + + double compute_delays(double inrisetime); // return outrisetime + + void leakage_feedback(double temperature); + PredecBlk *blk1; + PredecBlk *blk2; + PredecBlkDrv *drv1; + PredecBlkDrv *drv2; + + powerDef block_power; + powerDef driver_power; + +private: + // returns + pair + get_max_delay_before_decoder(pair input_pair1, + pair input_pair2); + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &block_power; + ar &driver_power; + Component::serialize(ar, version); + } +}; + +#endif // __PREDEC_H__ diff --git a/src/cacti/decoder/predec_blk.cc b/src/cacti/decoder/predec_blk.cc new file mode 100644 index 0000000..eee9767 --- /dev/null +++ b/src/cacti/decoder/predec_blk.cc @@ -0,0 +1,872 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "predec_blk.h" + +#include "area.h" +#include "decoder.h" +#include "parameter.h" + +#include +#include +#include + +PredecBlk::PredecBlk(int num_dec_signals, + Decoder *dec_, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1) { + dec = dec_; + exist = false; + number_input_addr_bits = 0; + C_ld_predec_blk_out = 0; + R_wire_predec_blk_out = 0; + branch_effort_nand2_gate_output = 1; + branch_effort_nand3_gate_output = 1; + flag_two_unique_paths = false; + flag_L2_gate = 0; + number_inputs_L1_gate = 0; + number_gates_L1_nand2_path = 0; + number_gates_L1_nand3_path = 0; + number_gates_L2 = 0; + min_number_gates_L1 = 2; + min_number_gates_L2 = 2; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + is_dram_ = is_dram; + + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; + + if (is_blk1 == true) { + if (num_addr_bits_dec <= 0) { + return; + } else if (num_addr_bits_dec < 4) { + // Just one predecoder block is required with NAND2 gates. No decoder + // required. The first level of predecoding directly drives the decoder + // output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } else { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } else { + if (num_addr_bits_dec >= 4) { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } + + compute_widths(); + compute_area(); +} + +void PredecBlk::set_params(int num_dec_signals, + Decoder *dec_, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1) { + dec = dec_; + exist = false; + number_input_addr_bits = 0; + C_ld_predec_blk_out = 0; + R_wire_predec_blk_out = 0; + branch_effort_nand2_gate_output = 1; + branch_effort_nand3_gate_output = 1; + flag_two_unique_paths = false; + flag_L2_gate = 0; + number_inputs_L1_gate = 0; + number_gates_L1_nand2_path = 0; + number_gates_L1_nand3_path = 0; + number_gates_L2 = 0; + min_number_gates_L1 = 2; + min_number_gates_L2 = 2; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + is_dram_ = is_dram; + + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; + + if (is_blk1 == true) { + if (num_addr_bits_dec <= 0) { + return; + } else if (num_addr_bits_dec < 4) { + // Just one predecoder block is required with NAND2 gates. No decoder + // required. The first level of predecoding directly drives the decoder + // output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } else { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } else { + if (num_addr_bits_dec >= 4) { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = + num_dec_per_predec * + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = + branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } + + compute_widths(); + compute_area(); +} + +void PredecBlk::compute_widths() { + double F, c_load_nand3_path, c_load_nand2_path; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + + if (exist == false) + return; + + switch (number_input_addr_bits) { + case 1: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; + case 2: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; + case 3: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 0; + break; + case 4: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 4; + break; + case 5: + flag_two_unique_paths = true; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 8; + branch_effort_nand3_gate_output = 4; + break; + case 6: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 2; + branch_effort_nand3_gate_output = 8; + break; + case 7: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 32; + branch_effort_nand3_gate_output = 16; + break; + case 8: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 64; + branch_effort_nand3_gate_output = 32; + break; + case 9: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 3; + branch_effort_nand3_gate_output = 64; + break; + default: + assert(0); + break; + } + + // find the number of gates and sizing in second level of predecoder (if there + // is a second level) + if (flag_L2_gate) { + if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate + w_L2_n[0] = 2 * g_tp.min_w_nmos_; + F = gnand2; + } else { // 2nd level is a NAND3 gate + w_L2_n[0] = 3 * g_tp.min_w_nmos_; + F = gnand3; + } + w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F *= C_ld_predec_blk_out / + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + number_gates_L2 = logical_effort(min_number_gates_L2, + flag_L2_gate == 2 ? gnand2 : gnand3, + F, + w_L2_n, + w_L2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + + // Now find the number of gates and widths in first level of predecoder + if ((flag_two_unique_paths) || + (number_inputs_L1_gate == + 2)) { // Whenever flag_two_unique_paths is true, it means first level + // of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it + // means a NAND2 gate is used in the first level of the predecoder + c_load_nand2_path = + branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * c_load_nand2_path / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort(min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + c_load_nand2_path, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } + + // Now find widths of gates along path in which first gate is a NAND3 + if ((flag_two_unique_paths) || + (number_inputs_L1_gate == + 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level + // of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it + // means a NAND3 gate is used in the first level of the predecoder + c_load_nand3_path = + branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * c_load_nand3_path / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort(min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + c_load_nand3_path, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } + } else { // find number of gates and widths in first level of predecoder block + // when there is no second level + if (number_inputs_L1_gate == 2) { + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * C_ld_predec_blk_out / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort(min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } else if (number_inputs_L1_gate == 3) { + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * C_ld_predec_blk_out / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort(min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } + } +} + +void PredecBlk::compute_area() { + if (exist) { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double tot_area_L1_nand3 = 0; + double leak_L1_nand3 = 0; + double gate_leak_L1_nand3 = 0; + + double tot_area_L1_nand2 = compute_gate_area( + NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); + double leak_L1_nand2 = + cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = + cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + tot_area_L1_nand3 = 0; + leak_L1_nand3 = 0; + gate_leak_L1_nand3 = 0; + } else { + tot_area_L1_nand3 = compute_gate_area( + NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); + leak_L1_nand3 = + cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = + cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } + + switch (number_input_addr_bits) { + case 1: // 2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 2: // 4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 3: // 8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 1; + break; + case 4: // 4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 0; + break; + case 5: // 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 1; + break; + case 6: // 8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 2; + break; + case 7: // 4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 1; + break; + case 8: // 4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 2; + break; + case 9: // 8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 3; + break; + default: + break; + } + + for (int i = 1; i < number_gates_L1_nand2_path; ++i) { + tot_area_L1_nand2 += compute_gate_area( + INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); + leak_L1_nand2 += cmos_Isub_leakage( + w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += + cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + tot_area_L1_nand2 *= num_L1_nand2; + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; + + for (int i = 1; i < number_gates_L1_nand3_path; ++i) { + tot_area_L1_nand3 += compute_gate_area( + INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); + leak_L1_nand3 += cmos_Isub_leakage( + w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += + cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + tot_area_L1_nand3 *= num_L1_nand3; + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; + + double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; + double cumulative_area_L2 = 0.0; + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; + + if (flag_L2_gate == 2) { + cumulative_area_L2 = + compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = + cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } else if (flag_L2_gate == 3) { + cumulative_area_L2 = + compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = + cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } + + for (int i = 1; i < number_gates_L2; ++i) { + cumulative_area_L2 += + compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += + cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + cumulative_area_L2 *= num_L2; + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; + + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + + power_nand2_path.readOp.power_gated_leakage = + leak_L1_nand2 * g_tp.peri_global.Vcc_min; + power_nand3_path.readOp.power_gated_leakage = + leak_L1_nand3 * g_tp.peri_global.Vcc_min; + power_L2.readOp.power_gated_leakage = leakage_L2 * g_tp.peri_global.Vcc_min; + + area.set_area(cumulative_area_L1 + cumulative_area_L2); + power_nand2_path.readOp.gate_leakage = + gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = + gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; + } +} + +pair +PredecBlk::compute_delays(pair inrisetime) // +{ + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + + double inrisetime_nand2_path = inrisetime.first; + double inrisetime_nand3_path = inrisetime.second; + int i; + double rd, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; + + // TODO: following delay calculation part can be greatly simplified. + // first check whether a predecoder block is required + if (exist) { + // Find delay in first level of predecoder block + // First find delay in path + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { + // First gate is a NAND2 gate + rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); + c_intrinsic = + 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + // Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) { + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + c_load = + gate_C(w_L1_nand2_n[i + 1] + w_L1_nand2_p[i + 1], 0.0, is_dram_); + c_intrinsic = + drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + // Add delay of the last inverter + i = number_gates_L1_nand2_path - 1; + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = + branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = + drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { // First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = + drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + if ((flag_two_unique_paths) || + (number_inputs_L1_gate == 3)) { // Check if the number of gates in the + // first level is more than 1. + // First gate is a NAND3 gate + rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); + c_intrinsic = + 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + + // Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) { + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + c_load = + gate_C(w_L1_nand3_n[i + 1] + w_L1_nand3_p[i + 1], 0.0, is_dram_); + c_intrinsic = + drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + // Add delay of the last inverter + i = number_gates_L1_nand3_path - 1; + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = + branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = + drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { // First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = + drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + // Find delay through second level + if (flag_L2_gate) { + if (flag_L2_gate == 2) { + rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = + 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { // flag_L2_gate = 3 + rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = + 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + for (i = 1; i < number_gates_L2 - 1; ++i) { + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L2_n[i + 1] + w_L2_p[i + 1], 0.0, is_dram_); + c_intrinsic = + drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + // Add delay of final inverter that drives the wordline decoders + i = number_gates_L2 - 1; + c_load = C_ld_predec_blk_out; + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; + return ret_val; +} + +void PredecBlk::leakage_feedback(double temperature) { + if (exist) { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double leak_L1_nand3 = 0; + double gate_leak_L1_nand3 = 0; + + double leak_L1_nand2 = + cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = + cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + leak_L1_nand3 = 0; + gate_leak_L1_nand3 = 0; + } else { + leak_L1_nand3 = + cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = + cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } + + switch (number_input_addr_bits) { + case 1: // 2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 2: // 4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 3: // 8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 1; + break; + case 4: // 4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 0; + break; + case 5: // 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 1; + break; + case 6: // 8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 2; + break; + case 7: // 4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 1; + break; + case 8: // 4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 2; + break; + case 9: // 8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 3; + break; + default: + break; + } + + for (int i = 1; i < number_gates_L1_nand2_path; ++i) { + leak_L1_nand2 += cmos_Isub_leakage( + w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += + cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; + + for (int i = 1; i < number_gates_L1_nand3_path; ++i) { + leak_L1_nand3 += cmos_Isub_leakage( + w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += + cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; + + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; + + if (flag_L2_gate == 2) { + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = + cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } else if (flag_L2_gate == 3) { + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = + cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } + + for (int i = 1; i < number_gates_L2; ++i) { + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += + cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; + + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + + power_nand2_path.readOp.gate_leakage = + gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = + gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; + } +} diff --git a/src/cacti/decoder/predec_blk.h b/src/cacti/decoder/predec_blk.h new file mode 100644 index 0000000..d515aad --- /dev/null +++ b/src/cacti/decoder/predec_blk.h @@ -0,0 +1,115 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __PREDEC_BLK_H__ +#define __PREDEC_BLK_H__ + +#include "area.h" +#include "component.h" +#include "decoder.h" +#include "parameter.h" +#include "powergating.h" + +#include +#include +#include +#include +#include + +class PredecBlk : public Component { +public: + PredecBlk(){}; + PredecBlk(int num_dec_signals, + Decoder *dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); + void set_params(int num_dec_signals, + Decoder *dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); + + Decoder *dec; + bool exist; + int number_input_addr_bits; + double C_ld_predec_blk_out; + double R_wire_predec_blk_out; + int branch_effort_nand2_gate_output; + int branch_effort_nand3_gate_output; + bool flag_two_unique_paths; + int flag_L2_gate; + int number_inputs_L1_gate; + int number_gates_L1_nand2_path; + int number_gates_L1_nand3_path; + int number_gates_L2; + int min_number_gates_L1; + int min_number_gates_L2; + int num_L1_active_nand2_path; + int num_L1_active_nand3_path; + double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; + double w_L2_n[MAX_NUMBER_GATES_STAGE]; + double w_L2_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + powerDef power_L2; + + bool is_dram_; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + pair + compute_delays(pair inrisetime); // + // return + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + Component::serialize(ar, version); + } +}; + +#endif //__PREDEC_BLK_H__ diff --git a/src/cacti/decoder/predec_blk_drv.cc b/src/cacti/decoder/predec_blk_drv.cc new file mode 100644 index 0000000..5b51f6f --- /dev/null +++ b/src/cacti/decoder/predec_blk_drv.cc @@ -0,0 +1,432 @@ +/***************************************************************************** + * McPAT/CACTI + * SOFTWARE LICENSE AGREEMENT + * Copyright 2012 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "predec_blk_drv.h" + +#include "area.h" +#include "decoder.h" +#include "parameter.h" +#include "predec_blk.h" + +#include +#include +#include + +void PredecBlkDrv::set_params(int way_select_, PredecBlk *blk_, bool is_dram) { + flag_driver_exists = 0; + number_gates_nand2_path = 0; + number_gates_nand3_path = 0; + min_number_gates = 2; + num_buffers_driving_1_nand2_load = 0; + num_buffers_driving_2_nand2_load = 0; + num_buffers_driving_4_nand2_load = 0; + num_buffers_driving_2_nand3_load = 0; + num_buffers_driving_8_nand3_load = 0; + num_buffers_nand3_path = 0; + c_load_nand2_path_out = 0; + c_load_nand3_path_out = 0; + r_load_nand2_path_out = 0; + r_load_nand3_path_out = 0; + delay_nand2_path = 0; + delay_nand3_path = 0; + blk = blk_; + dec = blk->dec; + is_dram_ = is_dram; + way_select = way_select_; + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; + } + + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) { + c_load_nand2_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } else if (dec->num_in_signals == 3) { + c_load_nand3_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } else if (way_select == 0) { + if (blk->exist) { + flag_driver_exists = 1; + } + } + + compute_widths(); + compute_area(); +} + +PredecBlkDrv::PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram) + : flag_driver_exists(0), number_gates_nand2_path(0), + number_gates_nand3_path(0), min_number_gates(2), + num_buffers_driving_1_nand2_load(0), num_buffers_driving_2_nand2_load(0), + num_buffers_driving_4_nand2_load(0), num_buffers_driving_2_nand3_load(0), + num_buffers_driving_8_nand3_load(0), num_buffers_nand3_path(0), + c_load_nand2_path_out(0), c_load_nand3_path_out(0), + r_load_nand2_path_out(0), r_load_nand3_path_out(0), delay_nand2_path(0), + delay_nand3_path(0), power_nand2_path(), power_nand3_path(), blk(blk_), + dec(blk->dec), is_dram_(is_dram), way_select(way_select_) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; + } + + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) { + c_load_nand2_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } else if (dec->num_in_signals == 3) { + c_load_nand3_path_out = + gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } else if (way_select == 0) { + if (blk->exist) { + flag_driver_exists = 1; + } + } + + compute_widths(); + compute_area(); +} + +void PredecBlkDrv::compute_widths() { + // The predecode block driver accepts as input the address bits from the + // h-tree network. For each addr bit it then generates addr and addrbar as + // outputs. For now ignore the effect of inversion to generate addrbar and + // simply treat addrbar as addr. + + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + + if (flag_driver_exists) { + double C_nand2_gate_blk = + gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); + double C_nand3_gate_blk = + gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); + + if (way_select == 0) { + if (blk->number_input_addr_bits == 1) { // 2 NAND2 gates + num_buffers_driving_2_nand2_load = 1; + c_load_nand2_path_out = 2 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == + 2) { // 4 NAND2 gates one 2-4 decoder + num_buffers_driving_4_nand2_load = 2; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == + 3) { // 8 NAND3 gates one 3-8 decoder + num_buffers_driving_8_nand3_load = 3; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == + 4) { // 4 + 4 NAND2 gates two 2-4 decoder + num_buffers_driving_4_nand2_load = 4; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == + 5) { // 4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one + // 3-8 decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == + 6) { // 8 + 8 NAND3 gates two 3-8 decoder + num_buffers_driving_8_nand3_load = 6; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == + 7) { // 4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and + // one 3-8 decoder + num_buffers_driving_4_nand2_load = 4; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == + 8) { // 4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and + // two 3-8 decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 6; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == + 9) { // 8 + 8 + 8 NAND3 gates three 3-8 decoder + num_buffers_driving_8_nand3_load = 9; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + } + + if ((blk->flag_two_unique_paths) || (blk->number_inputs_L1_gate == 2) || + (number_input_addr_bits == 0) || + ((way_select) && + (dec->num_in_signals == + 2))) { // this means that way_select is driving NAND2 in decoder. + width_nand2_path_n[0] = g_tp.min_w_nmos_; + width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; + F = c_load_nand2_path_out / + gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); + number_gates_nand2_path = logical_effort(min_number_gates, + 1, + F, + width_nand2_path_n, + width_nand2_path_p, + c_load_nand2_path_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } + + if ((blk->flag_two_unique_paths) || (blk->number_inputs_L1_gate == 3) || + ((way_select) && + (dec->num_in_signals == + 3))) { // this means that way_select is driving NAND3 in decoder. + width_nand3_path_n[0] = g_tp.min_w_nmos_; + width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; + F = c_load_nand3_path_out / + gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); + number_gates_nand3_path = logical_effort(min_number_gates, + 1, + F, + width_nand3_path_n, + width_nand3_path_p, + c_load_nand3_path_out, + p_to_n_sz_ratio, + is_dram_, + false, + g_tp.max_w_nmos_); + } + } +} + +void PredecBlkDrv::compute_area() { + double area_nand2_path = 0; + double area_nand3_path = 0; + double leak_nand2_path = 0; + double leak_nand3_path = 0; + double gate_leak_nand2_path = 0; + double gate_leak_nand3_path = 0; + + if (flag_driver_exists) { // first check whether a predecoder block driver is + // needed + for (int i = 0; i < number_gates_nand2_path; ++i) { + area_nand2_path += compute_gate_area(INV, + 1, + width_nand2_path_p[i], + width_nand2_path_n[i], + g_tp.cell_h_def); + leak_nand2_path += cmos_Isub_leakage( + width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); + gate_leak_nand2_path += cmos_Ig_leakage( + width_nand2_path_n[i], width_nand2_path_p[i], 1, inv, is_dram_); + } + area_nand2_path *= + (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + leak_nand2_path *= + (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= + (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + + for (int i = 0; i < number_gates_nand3_path; ++i) { + area_nand3_path += compute_gate_area(INV, + 1, + width_nand3_path_p[i], + width_nand3_path_n[i], + g_tp.cell_h_def); + leak_nand3_path += cmos_Isub_leakage( + width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); + gate_leak_nand3_path += cmos_Ig_leakage( + width_nand3_path_n[i], width_nand3_path_p[i], 1, inv, is_dram_); + } + area_nand3_path *= + (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + leak_nand3_path *= + (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= + (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + + power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.power_gated_leakage = + leak_nand2_path * g_tp.peri_global.Vcc_min; + power_nand3_path.readOp.power_gated_leakage = + leak_nand3_path * g_tp.peri_global.Vcc_min; + power_nand2_path.readOp.gate_leakage = + gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = + gate_leak_nand3_path * g_tp.peri_global.Vdd; + area.set_area(area_nand2_path + area_nand3_path); + } +} + +pair +PredecBlkDrv::compute_delays(double inrisetime_nand2_path, + double inrisetime_nand3_path) { + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + int i; + double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; + + if (flag_driver_exists) { + for (i = 0; i < number_gates_nand2_path - 1; ++i) { + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_gate_load = gate_C( + width_nand2_path_p[i + 1] + width_nand2_path_n[i + 1], 0.0, is_dram_); + c_intrinsic = + drain_C_( + width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += + (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; + } + + // Final inverter drives the predecoder block or the decoder output load + if (number_gates_nand2_path != 0) { + i = number_gates_nand2_path - 1; + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_intrinsic = + drain_C_( + width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + c_load = c_load_nand2_path_out; + tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += + (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; + // cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load + // < +#include +#include +#include +#include + +class PredecBlkDrv : public Component { +public: + void set_params(int way_select_, PredecBlk *blk_, bool is_dram); + PredecBlkDrv(){}; + PredecBlkDrv(int way_select_, PredecBlk *blk_, bool is_dram); + + int flag_driver_exists; + int number_input_addr_bits; + int number_gates_nand2_path; + int number_gates_nand3_path; + int min_number_gates; + int num_buffers_driving_1_nand2_load; + int num_buffers_driving_2_nand2_load; + int num_buffers_driving_4_nand2_load; + int num_buffers_driving_2_nand3_load; + int num_buffers_driving_8_nand3_load; + int num_buffers_nand3_path; + double c_load_nand2_path_out; + double c_load_nand3_path_out; + double r_load_nand2_path_out; + double r_load_nand3_path_out; + double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + + PredecBlk *blk; + Decoder *dec; + bool is_dram_; + int way_select; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + pair compute_delays( + double inrisetime_nand2_path, + double inrisetime_nand3_path); // return + + inline int num_addr_bits_nand2_path() { + return num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load; + } + inline int num_addr_bits_nand3_path() { + return num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load; + } + double get_rdOp_dynamic_E(int num_act_mats_hor_dir); + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + Component::serialize(ar, version); + } +}; + +#endif // __PREDEC_BLK_DRV_H__ diff --git a/src/cacti/mat.h b/src/cacti/mat.h index f8d2090..395fc64 100644 --- a/src/cacti/mat.h +++ b/src/cacti/mat.h @@ -34,7 +34,11 @@ #include "component.h" #include "decoder.h" +#include "driver.h" #include "powergating.h" +#include "predec.h" +#include "predec_blk.h" +#include "predec_blk_drv.h" #include "subarray.h" #include "wire.h" diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h index 776f90e..d96ab8d 100644 --- a/src/core/branch_predictor.h +++ b/src/core/branch_predictor.h @@ -38,6 +38,11 @@ #include "interconnect.h" #include "parameter.h" +#include +#include +#include +#include + class BranchPredictor : public Component { public: const ParseXML *XML; @@ -73,6 +78,27 @@ class BranchPredictor : public Component { private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &ithCore; + ar &clockRate; + ar &executionTime; + ar &scktRatio; + ar &chip_PR_overhead; + ar ¯o_PR_overhead; + ar &globalBPT; + ar &localBPT; + ar &L1_localBPT; + ar &L2_localBPT; + ar &chooser; + ar &RAS; + ar ∃ + Component::serialize(ar, version); + } }; #endif // __BRANCH_PREDICTOR__ diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index de63445..79e80af 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -41,6 +41,11 @@ #include "regfile.h" #include "scheduler.h" +#include +#include +#include +#include + class EXECU : public Component { public: const ParseXML *XML; @@ -86,6 +91,34 @@ class EXECU : public Component { private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &bypass; + ar &int_bypass; + ar &intTagBypass; + ar &int_mul_bypass; + ar &intTag_mul_Bypass; + ar &fp_bypass; + ar &fpTagBypass; + ar &fp_u; + ar &exeu; + ar &mul; + ar &scheu; + ar &rfu; + ar &lsq_height; + ar ¯o_PR_overhead; + ar &chip_PR_overhead; + ar &scktRatio; + ar &executionTime; + ar &clockRate; + ar ∃ + ar &ithCore; + Component::serialize(ar, version); + } }; #endif // __EXEC_U_H__ diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 989b347..476a209 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -43,13 +43,15 @@ #include void InstFetchU::set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_){ - XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; - coredynp=dyn_p_; - exist=exist_; + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_) { + XML = XML_interface; + ithCore = ithCore_; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + exist = exist_; if (!exist) return; int idx, tag, data, size, line, assoc, banks; @@ -105,7 +107,7 @@ void InstFetchU::set_params(const ParseXML *XML_interface, Core_device, coredynp.opt_local, coredynp.core_ty); - + scktRatio = g_tp.sckt_co_eff; chip_PR_overhead = g_tp.chip_layout_overhead; macro_PR_overhead = g_tp.macro_layout_overhead; @@ -275,10 +277,10 @@ void InstFetchU::set_params(const ParseXML *XML_interface, interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; IB.set_params(&interface_ip, - "InstBuffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + "InstBuffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); // output_data_csv(IB.IB.local_result); // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; @@ -340,13 +342,12 @@ void InstFetchU::set_params(const ParseXML *XML_interface, interface_ip.num_wr_ports = coredynp.predictionW; interface_ip.num_se_rd_ports = 0; BTB.set_params(&interface_ip, - "Branch Target Buffer", - Core_device, - coredynp.opt_local, - coredynp.core_ty); + "Branch Target Buffer", + Core_device, + coredynp.opt_local, + coredynp.core_ty); /// cout<<"area="< 0) { - BPT.set_stats(XML); - } +void InstFetchU::set_stats(const ParseXML *XML_interface) { + if (coredynp.predictionW > 0) { + BPT.set_stats(XML); + } init_stats = true; } -void InstFetchU::computeArea(){ +void InstFetchU::computeArea() { if (!init_params) { std::cerr << "[ InstFetchU ] Error: must set params before calling " "computeArea()\n"; @@ -394,7 +395,7 @@ void InstFetchU::computeArea(){ icache.caches.local_result.area); area.set_area(area.get_area() + icache.caches.local_result.area); - icache.missb.computeArea(); + icache.missb.computeArea(); icache.area.set_area(icache.area.get_area() + icache.missb.local_result.area); area.set_area(area.get_area() + icache.missb.local_result.area); @@ -402,19 +403,19 @@ void InstFetchU::computeArea(){ icache.area.set_area(icache.area.get_area() + icache.ifb.local_result.area); area.set_area(area.get_area() + icache.ifb.local_result.area); - icache.prefetchb.computeArea(); + icache.prefetchb.computeArea(); icache.area.set_area(icache.area.get_area() + icache.prefetchb.local_result.area); area.set_area(area.get_area() + icache.prefetchb.local_result.area); - IB.computeArea(); + IB.computeArea(); IB.area.set_area(IB.area.get_area() + IB.local_result.area); area.set_area(area.get_area() + IB.local_result.area); if (coredynp.predictionW > 0) { - BPT.computeArea(); + BPT.computeArea(); area.set_area(area.get_area() + BPT.area.get_area()); - BTB.computeArea(); + BTB.computeArea(); BTB.area.set_area(BTB.area.get_area() + BTB.local_result.area); area.set_area(area.get_area() + BTB.local_result.area); } @@ -422,9 +423,8 @@ void InstFetchU::computeArea(){ ID_misc.computeArea(); ID_operand.computeArea(); ID_inst.computeArea(); - //ID_inst.computeDynamicPower(); + // ID_inst.computeDynamicPower(); - // TODO: X86 decoder should decode the inst in cyclic mode under the control // of squencer. So the dynamic power should be multiplied by a few times. area.set_area(area.get_area() + @@ -441,7 +441,7 @@ void InstFetchU::computeDynamicPower(bool is_tdp) { std::cerr << "[ InstFetchU ] Error: must set params before calling " "computeDynamicPower()\n"; exit(1); - } + } if (is_tdp) { // init stats for Peak icache.caches.stats_t.readAc.access = @@ -588,7 +588,7 @@ void InstFetchU::computeDynamicPower(bool is_tdp) { // (icache.missb.local_result.power + // icache.ifb.local_result.power + // icache.prefetchb.local_result.power)*pppm_Isub; - + ID_misc.computeDynamicPower(); ID_operand.computeDynamicPower(); ID_inst.computeDynamicPower(); diff --git a/src/core/instfetch.h b/src/core/instfetch.h index b34a255..dffda2b 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -61,21 +61,47 @@ class InstFetchU : public Component { inst_decoder ID_misc; bool exist; - InstFetchU(){init_params = false; init_stats = false;}; + InstFetchU() { + init_params = false; + init_stats = false; + }; void set_stats(const ParseXML *XML_interface); void set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exsit = true); + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exsit = true); void computeArea(); void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~InstFetchU(); - private: +private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &cache_p; + ar &icache; + ar &IB; + ar &BTB; + ar &BPT; + ar &ID_inst; + ar &ID_operand; + ar &ID_misc; + ar ∃ + ar ¯o_PR_overhead; + ar &chip_PR_overhead; + ar &scktRatio; + ar &executionTime; + ar &clockRate; + ar &ithCore; + Component::serialize(ar, version); + } }; -#endif // __INST_FETCH_U_H__ \ No newline at end of file +#endif // __INST_FETCH_U_H__ diff --git a/src/core/loadstore.h b/src/core/loadstore.h index 37fd95b..196371e 100644 --- a/src/core/loadstore.h +++ b/src/core/loadstore.h @@ -39,6 +39,11 @@ #include "interconnect.h" #include "parameter.h" +#include +#include +#include +#include + class LoadStoreU : public Component { public: const ParseXML *XML; @@ -71,6 +76,17 @@ class LoadStoreU : public Component { private: bool init_params; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &dcache; + ar &LSQ; + ar &LoadQ; + Component::serialize(ar, version); + } }; #endif // __LOAD_STORE_U_H__ diff --git a/src/core/mmu.h b/src/core/mmu.h index 51d0026..22b3a78 100644 --- a/src/core/mmu.h +++ b/src/core/mmu.h @@ -38,6 +38,11 @@ #include "interconnect.h" #include "parameter.h" +#include +#include +#include +#include + class MemManU : public Component { public: const ParseXML *XML; @@ -69,6 +74,23 @@ class MemManU : public Component { private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &ithCore; + ar &clockRate; + ar &executionTime; + ar &scktRatio; + ar &chip_PR_overhead; + ar ¯o_PR_overhead; + ar &itlb; + ar &dtlb; + ar ∃ + Component::serialize(ar, version); + } }; #endif // __MEMORY_MANAGEMENT_U_H__ diff --git a/src/core/regfile.h b/src/core/regfile.h index ead32fc..b2627d9 100644 --- a/src/core/regfile.h +++ b/src/core/regfile.h @@ -72,6 +72,25 @@ class RegFU : public Component { private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &IRF; + ar &FRF; + ar &RFWIN; + ar &fp_regfile_height; + ar &int_regfile_height; + ar ¯o_PR_overhead; + ar &chip_PR_overhead; + ar &scktRatio; + ar &executionTime; + ar &clockRate; + ar &ithCore; + Component::serialize(ar, version); + } }; #endif // __REGFILE_U_H__ diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index 22d88c3..2ed1d36 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -39,6 +39,11 @@ #include "interconnect.h" #include "parameter.h" +#include +#include +#include +#include + class RENAMINGU : public Component { public: int ithCore; @@ -82,6 +87,26 @@ class RENAMINGU : public Component { unsigned int rename_reads; unsigned int int_instructions; unsigned int fp_instructions; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &ithCore; + ar &clockRate; + ar &executionTime; + ar &iFRAT; + ar &fFRAT; + ar &iRRAT; + ar &fRRAT; + ar &ifreeL; + ar &ffreeL; + ar &idcl; + ar &fdcl; + ar ∃ + Component::serialize(ar, version); + } }; #endif // __RENAMING_U_H__ diff --git a/src/core/scheduler.h b/src/core/scheduler.h index eff647d..7136916 100644 --- a/src/core/scheduler.h +++ b/src/core/scheduler.h @@ -39,6 +39,11 @@ #include "parameter.h" #include "selection_logic.h" +#include +#include +#include +#include + class SchedulerU : public Component { public: const ParseXML *XML; @@ -75,6 +80,28 @@ class SchedulerU : public Component { private: bool init_params; bool init_stats; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &ithCore; + ar &clockRate; + ar &executionTime; + ar &scktRatio; + ar &chip_PR_overhead; + ar ¯o_PR_overhead; + ar &Iw_height; + ar &fp_Iw_height; + ar &ROB_height; + ar &int_inst_window; + ar &fp_inst_window; + ar &ROB; + ar &instruction_selection; + ar ∃ + Component::serialize(ar, version); + } }; #endif // __SCHEDULER_H__ diff --git a/src/logic/CMakeLists.txt b/src/logic/CMakeLists.txt index 85cdf6a..4737011 100644 --- a/src/logic/CMakeLists.txt +++ b/src/logic/CMakeLists.txt @@ -15,4 +15,4 @@ add_library(logic undiff_core.cc ) target_include_directories(logic PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(logic LINK_PUBLIC cacti top) +target_link_libraries(logic LINK_PUBLIC cacti top decoder) diff --git a/src/logic/dep_resource_conflict_check.h b/src/logic/dep_resource_conflict_check.h index 92e930a..87ec53f 100644 --- a/src/logic/dep_resource_conflict_check.h +++ b/src/logic/dep_resource_conflict_check.h @@ -42,6 +42,10 @@ #include "parameter.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -57,7 +61,13 @@ class dep_resource_conflict_check : public Component { bool _is_default = true); InputParameter l_ip; uca_org_t local_result; - double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; + double WNORn; + double WNORp; + double Wevalinvp; + double Wevalinvn; + double Wcompn; + double Wcompp; + double Wcomppreequ; CoreDynParam coredynp; int compare_bits; bool is_default; @@ -76,6 +86,25 @@ class dep_resource_conflict_check : public Component { ~dep_resource_conflict_check() { local_result.cleanup(); } void leakage_feedback(double temperature); + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &local_result; + ar &WNORn; + ar &WNORp; + ar &Wevalinvp; + ar &Wevalinvn; + ar &Wcompn; + ar &Wcompp; + ar &Wcomppreequ; + ar &compare_bits; + ar &is_default; + Component::serialize(ar, version); + } }; #endif // __DEP_RESOURCE_CONFLICT_CHECK_H__ diff --git a/src/logic/dff_cell.h b/src/logic/dff_cell.h index 9069c95..03c18d4 100644 --- a/src/logic/dff_cell.h +++ b/src/logic/dff_cell.h @@ -42,6 +42,10 @@ #include "parameter.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -72,6 +76,29 @@ class DFFCell : public Component { double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); void compute_DFF_cell(void); + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &is_dram; + ar &cell_load; + ar &WdecNANDn; + ar &WdecNANDp; + ar &clock_cap; + ar &model; + ar &n_switch; + ar &n_keep_1; + ar &n_keep_0; + ar &n_clock; + ar &e_switch; + ar &e_keep_1; + ar &e_keep_0; + ar &e_clock; + Component::serialize(ar, version); + } }; #endif //__DFF_CELL_H__ diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index daa770b..54a3abf 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -10,8 +10,15 @@ #include "const.h" #include "decoder.h" #include "parameter.h" +#include "predec.h" +#include "predec_blk.h" +#include "predec_blk_drv.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -58,6 +65,29 @@ class inst_decoder : public Component { private: bool init_params; + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &is_default; + ar &opcode_length; + ar &num_decoders; + ar &x86; + ar &num_decoder_segments; + ar &num_decoded_signals; + ar &local_result; + ar &device_ty; + ar &core_ty; + ar &final_dec; + ar &pre_dec; + ar &predec_blk1; + ar &predec_blk2; + ar &predec_blk_drv1; + ar &predec_blk_drv2; + Component::serialize(ar, version); + } }; -#endif //__INST_DECODER_H__ \ No newline at end of file +#endif //__INST_DECODER_H__ diff --git a/src/logic/pipeline.h b/src/logic/pipeline.h index f6dd544..53e0747 100644 --- a/src/logic/pipeline.h +++ b/src/logic/pipeline.h @@ -42,6 +42,10 @@ #include "parameter.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -58,7 +62,8 @@ class Pipeline : public Component { uca_org_t local_result; CoreDynParam coredynp; enum Device_ty device_ty; - bool is_core_pipeline, is_default; + bool is_core_pipeline; + bool is_default; double num_piperegs; // int pipeline_stages; // int tot_stage_vector, per_stage_vector; @@ -73,6 +78,24 @@ class Pipeline : public Component { void compute_stage_vector(); void compute(); ~Pipeline() { local_result.cleanup(); }; + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &local_result; + ar &device_ty; + ar &is_core_pipeline; + ar &is_default; + ar &num_piperegs; + ar &process_ind; + ar &WNANDn; + ar &WNANDp; + ar &load_per_pipeline_stage; + Component::serialize(ar, version); + } }; #endif // __PIPELINE_H__ diff --git a/src/logic/selection_logic.h b/src/logic/selection_logic.h index 78fb2e1..4eaaa22 100644 --- a/src/logic/selection_logic.h +++ b/src/logic/selection_logic.h @@ -42,6 +42,10 @@ #include "parameter.h" #include "xmlParser.h" +#include +#include +#include +#include #include #include #include @@ -75,6 +79,22 @@ class selection_logic : public Component { enum Core_type core_ty_ = Inorder); void selection_power(); void leakage_feedback(double temperature); // TODO + +private: + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &is_default; + ar &local_result; + ar &win_entries; + ar &issue_width; + ar &num_threads; + ar &device_ty; + ar &core_ty; + Component::serialize(ar, version); + } }; #endif //__SELECTION_LOGIC_H__ From 16db390016c582e947812aa3e5a451c31d950c30 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Wed, 24 Jun 2020 00:53:20 -0500 Subject: [PATCH 54/59] core done --- src/core/core.cc | 277 ++++++++++++++++++++---------------------- src/core/core.h | 17 ++- src/core/instfetch.cc | 2 +- src/processor.cc | 8 +- 4 files changed, 152 insertions(+), 152 deletions(-) diff --git a/src/core/core.cc b/src/core/core.cc index 3145747..84ec674 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -44,7 +44,7 @@ #include //#include "globalvar.h" -Core::Core(const ParseXML *XML_interface, +void Core::set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_){ /* @@ -52,108 +52,118 @@ Core::Core(const ParseXML *XML_interface, */ XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; - ifu=0; mmu=0; rnu=0; l2cache=0; bool exit_flag = true; - double pipeline_area_per_unit; // interface_ip.wire_is_mat_type = 2; // interface_ip.wire_os_mat_type = 2; // interface_ip.wt =Global_30; set_core_param(); if (XML->sys.Private_L2) { - l2cache = new SharedCache(); - l2cache->set_params(XML, ithCore, &interface_ip); - l2cache->set_stats(XML); - l2cache->computeArea(); + l2cache.set_params(XML, ithCore, &interface_ip); } clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; - ifu = new InstFetchU(); - ifu->set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); - ifu->computeArea(); - ifu->set_stats(XML); + ifu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); + lsu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); - lsu.computeArea(); - mmu = new MemManU(); - mmu->set_params(XML, ithCore, &interface_ip, coredynp); - mmu->computeArea(); - mmu->set_stats(XML); + lsu.computeArea(); //done on purpose because the exu unit is dependent on the lsu.lsq_height which is set in compute area + mmu.set_params(XML, ithCore, &interface_ip, coredynp); + exu.set_params( XML, ithCore, &interface_ip, lsu.lsq_height, coredynp, exit_flag); - exu.computeArea(); - exu.set_stats(XML); - exu.computeStaticPower(); + undiffCore.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); - undiffCore.computeArea(); + // undiffCore.computeArea(); // undiffCore.computeDynamicPower(); if (coredynp.core_ty == OOO) { - rnu = new RENAMINGU(); - rnu->set_params(XML, ithCore, &interface_ip, coredynp); - rnu->computeArea(); - rnu->set_stats(XML); + rnu.set_params(XML, ithCore, &interface_ip, coredynp); } corepipe.set_params(&interface_ip, coredynp); - corepipe.computeArea(); + + + // area.set_area(area.get_area()+ corepipe.area.get_area()); + + + // //clock power + // clockNetwork.init_wire_external(is_default, &interface_ip); + // clockNetwork.clk_area =area*1.1;//10% of placement overhead. + // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal + // clockNetwork.start_wiring_level =5;//toplevel metal + // clockNetwork.num_regs = corepipe.tot_stage_vector; + // clockNetwork.optimize_wire(); +} + +void Core::computeArea(){ if (coredynp.core_ty == OOO) { + rnu.computeArea(); + rnu.set_stats(XML); + } + corepipe.computeArea(); + + if (coredynp.core_ty == OOO) { pipeline_area_per_unit = (corepipe.area.get_area() * coredynp.num_pipelines) / 5.0; - if (rnu->exist) { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + if (rnu.exist) { + rnu.area.set_area(rnu.area.get_area() + pipeline_area_per_unit); } } else { pipeline_area_per_unit = (corepipe.area.get_area() * coredynp.num_pipelines) / 4.0; } - // area.set_area(area.get_area()+ corepipe.area.get_area()); - if (ifu->exist) { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); + if (coredynp.core_ty == OOO) { + if (rnu.exist) { + + area.set_area(area.get_area() + rnu.area.get_area()); + } } + + if (XML->sys.Private_L2) { + l2cache.set_stats(XML); + l2cache.computeArea(); + area.set_area(area.get_area() + l2cache.area.get_area()); + } + ifu.set_stats(XML); + ifu.computeArea(); + if (ifu.exist) { + ifu.area.set_area(ifu.area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu.area.get_area()); + } + if (lsu.exist) { lsu.area.set_area(lsu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + lsu.area.get_area()); } + + mmu.computeArea(); + mmu.set_stats(XML); + if (mmu.exist) { + mmu.area.set_area(mmu.area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu.area.get_area()); + } + exu.computeArea(); + exu.set_stats(XML); + exu.computeStaticPower(); if (exu.exist) { exu.area.set_area(exu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + exu.area.get_area()); } - if (mmu->exist) { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + mmu->area.get_area()); - } - - if (coredynp.core_ty == OOO) { - if (rnu->exist) { - - area.set_area(area.get_area() + rnu->area.get_area()); - } - } + undiffCore.computeArea(); if (undiffCore.exist) { area.set_area(area.get_area() + undiffCore.area.get_area()); } - - if (XML->sys.Private_L2) { - area.set_area(area.get_area() + l2cache->area.get_area()); - } - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. - // rule of thumb clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); } -void Core::computeEnergy(bool is_tdp) { + +void Core::computeDynamicPower(bool is_tdp) { /* * When computing TDP, power = energy_per_cycle (the value computed in this * function) * clock_rate (in the display_energy function) When computing @@ -165,14 +175,14 @@ void Core::computeEnergy(bool is_tdp) { double rtp_pipeline_coe; double num_units = 4.0; if (is_tdp) { - ifu->computeDynamicPower(is_tdp); + ifu.computeDynamicPower(is_tdp); lsu.computeDynamicPower(is_tdp); - mmu->computeDynamicPower(is_tdp); + mmu.computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeStaticPower(is_tdp); + rnu.computeStaticPower(is_tdp); set_pppm( pppm_t, coredynp.num_pipelines / num_units, @@ -180,26 +190,26 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); // User need to feed a duty cycle to improve accuracy - if (rnu->exist) { - rnu->power = rnu->power + corepipe.power * pppm_t; - power = power + rnu->power; + if (rnu.exist) { + rnu.power = rnu.power + corepipe.power * pppm_t; + power = power + rnu.power; } } - if (ifu->exist) { + if (ifu.exist) { set_pppm(pppm_t, coredynp.num_pipelines / num_units * coredynp.IFU_duty_cycle, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); // cout << "IFU = " << - // ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe.power * pppm_t; + // ifu.power.readOp.dynamic*clockRate << " W" << endl; + ifu.power = ifu.power + corepipe.power * pppm_t; // cout << "IFU = " << - // ifu->power.readOp.dynamic*clockRate << " W" << endl; + // ifu.power.readOp.dynamic*clockRate << " W" << endl; // cout << "1/4 pipe = " << // corepipe.power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; + power = power + ifu.power; // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } @@ -229,17 +239,17 @@ void Core::computeEnergy(bool is_tdp) { // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } - if (mmu->exist) { + if (mmu.exist) { set_pppm(pppm_t, coredynp.num_pipelines / num_units * (0.5 + 0.5 * coredynp.LSU_duty_cycle), coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - mmu->power = mmu->power + corepipe.power * pppm_t; + mmu.power = mmu.power + corepipe.power * pppm_t; // cout << "MMU = " << - // mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; + // mmu.power.readOp.dynamic*clockRate << " W" << endl; + power = power + mmu.power; // cout << "core = " << // power.readOp.dynamic*clockRate << " W" << endl; } @@ -248,21 +258,21 @@ void Core::computeEnergy(bool is_tdp) { if (XML->sys.Private_L2) { - l2cache->computeStaticPower(true); - set_pppm(pppm_t, l2cache->cachep.clockRate / clockRate, 1, 1, 1); - // l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power * pppm_t; + l2cache.computeStaticPower(true); + set_pppm(pppm_t, l2cache.cachep.clockRate / clockRate, 1, 1, 1); + // l2cache.power = l2cache.power*pppm_t; + power = power + l2cache.power * pppm_t; } } else { - ifu->computeDynamicPower(is_tdp); + ifu.computeDynamicPower(is_tdp); lsu.computeDynamicPower(is_tdp); - mmu->computeDynamicPower(is_tdp); + mmu.computeDynamicPower(is_tdp); exu.computeDynamicPower(is_tdp); if (coredynp.core_ty == OOO) { num_units = 5.0; - rnu->computeStaticPower(is_tdp); + rnu.computeStaticPower(is_tdp); if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; @@ -274,16 +284,16 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - if (rnu->exist) { - rnu->rt_power = rnu->rt_power + corepipe.power * pppm_t; + if (rnu.exist) { + rnu.rt_power = rnu.rt_power + corepipe.power * pppm_t; - rt_power = rt_power + rnu->rt_power; + rt_power = rt_power + rnu.rt_power; } } else { num_units = 4.0; } - if (ifu->exist) { + if (ifu.exist) { if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.IFU_duty_cycle * XML->sys.total_cycles * @@ -297,8 +307,8 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - ifu->rt_power = ifu->rt_power + corepipe.power * pppm_t; - rt_power = rt_power + ifu->rt_power; + ifu.rt_power = ifu.rt_power + corepipe.power * pppm_t; + rt_power = rt_power + ifu.rt_power; } if (lsu.exist) { if (XML->sys.homogeneous_cores == 1) { @@ -335,7 +345,7 @@ void Core::computeEnergy(bool is_tdp) { exu.rt_power = exu.rt_power + corepipe.power * pppm_t; rt_power = rt_power + exu.rt_power; } - if (mmu->exist) { + if (mmu.exist) { if (XML->sys.homogeneous_cores == 1) { rtp_pipeline_coe = coredynp.pipeline_duty_cycle * (0.5 + 0.5 * coredynp.LSU_duty_cycle) * @@ -350,18 +360,18 @@ void Core::computeEnergy(bool is_tdp) { coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units, coredynp.num_pipelines / num_units); - mmu->rt_power = mmu->rt_power + corepipe.power * pppm_t; - rt_power = rt_power + mmu->rt_power; + mmu.rt_power = mmu.rt_power + corepipe.power * pppm_t; + rt_power = rt_power + mmu.rt_power; } rt_power = rt_power + undiffCore.power; // cout << "EXE = " << exu.power.readOp.dynamic*clockRate << " W" //<< endl; if (XML->sys.Private_L2) { - l2cache->computeStaticPower(); - // set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - // l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; + l2cache.computeStaticPower(); + // set_pppm(pppm_t,1/l2cache.cachep.executionTime, 1,1,1); + // l2cache.rt_power = l2cache.rt_power*pppm_t; + rt_power = rt_power + l2cache.rt_power; } } } @@ -394,59 +404,59 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << "Runtime Dynamic = " << rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; - if (ifu->exist) { + if (ifu.exist) { cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << ifu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << ifu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << ifu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? ifu->power.readOp.longer_channel_leakage - : ifu->power.readOp.leakage) + << (long_channel ? ifu.power.readOp.longer_channel_leakage + : ifu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? ifu->power.readOp.power_gated_with_long_channel_leakage - : ifu->power.readOp.power_gated_leakage) + ? ifu.power.readOp.power_gated_with_long_channel_leakage + : ifu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << ifu.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << ifu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << ifu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - ifu->displayEnergy(indent + 4, plevel, is_tdp); + ifu.displayEnergy(indent + 4, plevel, is_tdp); } } if (coredynp.core_ty == OOO) { - if (rnu->exist) { + if (rnu.exist) { cout << indent_str << "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << rnu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << rnu->power.readOp.dynamic * clockRate + << "Peak Dynamic = " << rnu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? rnu->power.readOp.longer_channel_leakage - : rnu->power.readOp.leakage) + << (long_channel ? rnu.power.readOp.longer_channel_leakage + : rnu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? rnu->power.readOp.power_gated_with_long_channel_leakage - : rnu->power.readOp.power_gated_leakage) + ? rnu.power.readOp.power_gated_with_long_channel_leakage + : rnu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next - << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" + << "Gate Leakage = " << rnu.power.readOp.gate_leakage << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << rnu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << rnu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - rnu->display(indent + 4, plevel, is_tdp); + rnu.display(indent + 4, plevel, is_tdp); } } } @@ -477,28 +487,28 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { lsu.display(indent + 4, plevel, is_tdp); } } - if (mmu->exist) { + if (mmu.exist) { cout << indent_str << "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() * 1e-6 + cout << indent_str_next << "Area = " << mmu.area.get_area() * 1e-6 << " mm^2" << endl; cout << indent_str_next - << "Peak Dynamic = " << mmu->power.readOp.dynamic * clockRate << " W" + << "Peak Dynamic = " << mmu.power.readOp.dynamic * clockRate << " W" << endl; cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel ? mmu->power.readOp.longer_channel_leakage - : mmu->power.readOp.leakage) + << (long_channel ? mmu.power.readOp.longer_channel_leakage + : mmu.power.readOp.leakage) << " W" << endl; if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = " << (long_channel - ? mmu->power.readOp.power_gated_with_long_channel_leakage - : mmu->power.readOp.power_gated_leakage) + ? mmu.power.readOp.power_gated_with_long_channel_leakage + : mmu.power.readOp.power_gated_leakage) << " W" << endl; cout << indent_str_next << "Runtime Dynamic = " - << mmu->rt_power.readOp.dynamic / executionTime << " W" << endl; + << mmu.rt_power.readOp.dynamic / executionTime << " W" << endl; cout << endl; if (plevel > 2) { - mmu->displayEnergy(indent + 4, plevel, is_tdp); + mmu.displayEnergy(indent + 4, plevel, is_tdp); } } if (exu.exist) { @@ -552,17 +562,17 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // } if (XML->sys.Private_L2) { - l2cache->display(4, true); + l2cache.display(4, true); } } else { // cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = //" - //<< ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; cout + //<< ifu.rt_power.readOp.dynamic*clockRate << " W" << endl; cout //<< indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " - // << ifu->rt_power.readOp.leakage <<" W" << endl; cout << + // << ifu.rt_power.readOp.leakage <<" W" << endl; cout << // indent_str_next << "Instruction Fetch Unit Gate Leakage = " << - // ifu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // ifu.rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next //<< "Load Store Unit Peak Dynamic = " << // lsu.rt_power.readOp.dynamic*clockRate << " W" << endl; cout @@ -572,11 +582,11 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { // << "Load Store Unit Gate Leakage = " << // lsu.rt_power.readOp.gate_leakage //<< " W" << endl; cout << indent_str_next << "Memory Management Unit - // Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << + // Peak Dynamic = " << mmu.rt_power.readOp.dynamic*clockRate << " W" << // endl; cout << indent_str_next << "Memory Management Unit Subthreshold - // Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; cout + // Leakage = " << mmu.rt_power.readOp.leakage << " W" << endl; cout // << indent_str_next << "Memory Management Unit Gate Leakage = " << - // mmu->rt_power.readOp.gate_leakage << " W" << endl; cout << + // mmu.rt_power.readOp.gate_leakage << " W" << endl; cout << // indent_str_next << "Execution Unit Peak Dynamic = " << // exu.rt_power.readOp.dynamic*clockRate << " W" << endl; cout // << indent_str_next << "Execution Unit Subthreshold Leakage = " << @@ -590,23 +600,6 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { Core ::~Core() { - if (ifu) { - delete ifu; - ifu = 0; - } - if (rnu) { - delete rnu; - rnu = 0; - } - if (mmu) { - delete mmu; - mmu = 0; - } - - if (l2cache) { - delete l2cache; - l2cache = 0; - } } void Core::set_core_param() { diff --git a/src/core/core.h b/src/core/core.h index ff83c1a..8602a3f 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -54,22 +54,27 @@ class Core : public Component { InputParameter interface_ip; double clockRate, executionTime; double scktRatio, chip_PR_overhead, macro_PR_overhead; - InstFetchU *ifu; + InstFetchU ifu; LoadStoreU lsu; - MemManU *mmu; + MemManU mmu; EXECU exu; - RENAMINGU *rnu; + RENAMINGU rnu; Pipeline corepipe; UndiffCore undiffCore; - SharedCache *l2cache; + SharedCache l2cache; CoreDynParam coredynp; + + double pipeline_area_per_unit; + // full_decoder inst_decoder; // clock_network clockNetwork; - Core(const ParseXML *XML_interface, + Core(){}; + void set_params(const ParseXML *XML_interface, int ithCore_, InputParameter *interface_ip_); + void computeArea(); void set_core_param(); - void computeEnergy(bool is_tdp = true); + void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~Core(); }; diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 989b347..0d55f14 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -438,7 +438,7 @@ void InstFetchU::computeDynamicPower(bool is_tdp) { if (!exist) return; if (!init_stats) { - std::cerr << "[ InstFetchU ] Error: must set params before calling " + std::cerr << "[ InstFetchU ] Error: must set stats before calling " "computeDynamicPower()\n"; exit(1); } diff --git a/src/processor.cc b/src/processor.cc index 808ccba..b4a8300 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -108,9 +108,11 @@ void Processor::init(const ParseXML *XML, bool cp) { } for (i = 0; i < numCore; i++) { - cores.push_back(new Core(XML, i, &interface_ip)); - cores[i]->computeEnergy(); - cores[i]->computeEnergy(false); + cores.push_back(new Core()); + cores[i]->set_params(XML, i, &interface_ip); + cores[i]->computeArea(); + cores[i]->computeDynamicPower(); + cores[i]->computeDynamicPower(false); if (procdynp.homoCore) { core.area.set_area(core.area.get_area() + cores[i]->area.get_area() * procdynp.numCore); From 1d8081e3626f119ff2c61bda7104ea48ffa2a991 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 24 Jun 2020 06:32:06 -0500 Subject: [PATCH 55/59] Adding more fields to inst_decoder & predec_blk --- src/cacti/decoder/predec_blk.h | 29 +++++++++++++++++++++++++++++ src/logic/inst_decoder.cc | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/cacti/decoder/predec_blk.h b/src/cacti/decoder/predec_blk.h index d515aad..eb2eacf 100644 --- a/src/cacti/decoder/predec_blk.h +++ b/src/cacti/decoder/predec_blk.h @@ -108,6 +108,35 @@ class PredecBlk : public Component { template void serialize(Archive &ar, const unsigned int version) { + ar ∃ + ar &number_input_addr_bits; + ar &C_ld_predec_blk_out; + ar &R_wire_predec_blk_out; + ar &branch_effort_nand2_gate_output; + ar &branch_effort_nand3_gate_output; + ar &flag_two_unique_paths; + ar &flag_L2_gate; + ar &number_inputs_L1_gate; + ar &number_gates_L1_nand2_path; + ar &number_gates_L1_nand3_path; + ar &number_gates_L2; + ar &min_number_gates_L1; + ar &min_number_gates_L2; + ar &num_L1_active_nand2_path; + ar &num_L1_active_nand3_path; + ar &w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; + ar &w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; + ar &w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; + ar &w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; + ar &w_L2_n[MAX_NUMBER_GATES_STAGE]; + ar &w_L2_p[MAX_NUMBER_GATES_STAGE]; + ar &delay_nand2_path; + ar &delay_nand3_path; + ar &power_nand2_path; + ar &power_nand3_path; + ar &power_L2; + + ar &is_dram_; Component::serialize(ar, version); } }; diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index d3a8810..d40ee6f 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -235,4 +235,4 @@ inst_decoder::~inst_decoder() { delete pre_dec.blk2; delete pre_dec.drv1; delete pre_dec.drv2; -} \ No newline at end of file +} From dc7c5daf5d8d52e904dd47b4bb131e7c81d7ea27 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 24 Jun 2020 06:49:43 -0500 Subject: [PATCH 56/59] merge: feature/refactor All unit tests passing, time to add serialization to core and test --- unit_test/unit_test.sh | 48 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/unit_test/unit_test.sh b/unit_test/unit_test.sh index fc2eb78..df62da0 100755 --- a/unit_test/unit_test.sh +++ b/unit_test/unit_test.sh @@ -83,36 +83,36 @@ print_info "#########################################################" print_info "#########################################################" print_info "# Unit Test Serialization 1 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_1 \ -# --output=./output/serialization_test_1 \ -# --golden=./golden/serialization_test_1 \ -# --serial=True \ -# --nthreads=$NTHREADS +./unit_test.py \ + --input=./input/serialization_test_1 \ + --output=./output/serialization_test_1 \ + --golden=./golden/serialization_test_1 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 2 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_2 \ -# --output=./output/serialization_test_2 \ -# --golden=./golden/serialization_test_2 \ -# --serial=True \ -# --nthreads=$NTHREADS +./unit_test.py \ + --input=./input/serialization_test_2 \ + --output=./output/serialization_test_2 \ + --golden=./golden/serialization_test_2 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 3 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_3 \ -# --output=./output/serialization_test_3 \ -# --golden=./golden/serialization_test_3 \ -# --serial=True \ -# --nthreads=$NTHREADS +./unit_test.py \ + --input=./input/serialization_test_3 \ + --output=./output/serialization_test_3 \ + --golden=./golden/serialization_test_3 \ + --serial=True \ + --nthreads=$NTHREADS print_info "#########################################################" print_info "# Unit Test Serialization 4 #" print_info "#########################################################" -#./unit_test.py \ -# --input=./input/serialization_test_4 \ -# --output=./output/serialization_test_4 \ -# --golden=./golden/serialization_test_4 \ -# --serial=True \ -# --nthreads=$NTHREADS +./unit_test.py \ + --input=./input/serialization_test_4 \ + --output=./output/serialization_test_4 \ + --golden=./golden/serialization_test_4 \ + --serial=True \ + --nthreads=$NTHREADS From d658b0479a9d80141989162668e486e55719dda6 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 24 Jun 2020 10:29:50 -0500 Subject: [PATCH 57/59] ExecU ALUs not calculating power properly --- src/cacti/decoder/predec_blk.h | 12 ++++---- src/core/branch_predictor.h | 3 +- src/core/core.cc | 40 +++++++++++++------------ src/core/core.h | 36 ++++++++++++++++++++--- src/core/exec_unit.h | 5 ++-- src/core/instfetch.h | 3 +- src/core/loadstore.cc | 46 ++++++++++++++++++++++++++++- src/core/loadstore.h | 3 +- src/core/mmu.cc | 5 ++-- src/core/mmu.h | 3 +- src/core/regfile.h | 3 +- src/core/renaming_unit.h | 4 ++- src/core/scheduler.h | 3 +- src/logic/functional_unit.h | 5 +--- src/logic/inst_decoder.h | 15 +++++----- src/logic/pipeline.cc | 21 +++++++------- src/logic/pipeline.h | 8 ++--- src/logic/undiff_core.cc | 41 ++++++++++++++------------ src/logic/undiff_core.h | 14 ++++----- src/processor.cc | 53 +++++++++++++++++----------------- src/processor.h | 3 +- 21 files changed, 206 insertions(+), 120 deletions(-) diff --git a/src/cacti/decoder/predec_blk.h b/src/cacti/decoder/predec_blk.h index eb2eacf..219a41c 100644 --- a/src/cacti/decoder/predec_blk.h +++ b/src/cacti/decoder/predec_blk.h @@ -124,12 +124,12 @@ class PredecBlk : public Component { ar &min_number_gates_L2; ar &num_L1_active_nand2_path; ar &num_L1_active_nand3_path; - ar &w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; - ar &w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; - ar &w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; - ar &w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; - ar &w_L2_n[MAX_NUMBER_GATES_STAGE]; - ar &w_L2_p[MAX_NUMBER_GATES_STAGE]; + ar &w_L1_nand2_n; + ar &w_L1_nand2_p; + ar &w_L1_nand3_n; + ar &w_L1_nand3_p; + ar &w_L2_n; + ar &w_L2_p; ar &delay_nand2_path; ar &delay_nand3_path; ar &power_nand2_path; diff --git a/src/core/branch_predictor.h b/src/core/branch_predictor.h index d96ab8d..28d84d9 100644 --- a/src/core/branch_predictor.h +++ b/src/core/branch_predictor.h @@ -97,7 +97,8 @@ class BranchPredictor : public Component { ar &chooser; ar &RAS; ar ∃ - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/core.cc b/src/core/core.cc index 84ec674..c995e35 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -45,13 +45,16 @@ //#include "globalvar.h" void Core::set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_){ + int ithCore_, + InputParameter *interface_ip_, + bool cp) { /* * initialize, compute and optimize individual components. */ - XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; + XML = XML_interface; + ithCore = ithCore_; + interface_ip = *interface_ip_; bool exit_flag = true; @@ -62,35 +65,39 @@ void Core::set_params(const ParseXML *XML_interface, if (XML->sys.Private_L2) { l2cache.set_params(XML, ithCore, &interface_ip); + l2cache.set_stats(XML); } clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; ifu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); + ifu.set_stats(XML); lsu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); - lsu.computeArea(); //done on purpose because the exu unit is dependent on the lsu.lsq_height which is set in compute area + if (!cp) { + lsu.computeArea(); // done on purpose because the exu unit is dependent on + // the lsu.lsq_height which is set in compute area + } mmu.set_params(XML, ithCore, &interface_ip, coredynp); + mmu.set_stats(XML); exu.set_params( XML, ithCore, &interface_ip, lsu.lsq_height, coredynp, exit_flag); - + exu.set_stats(XML); + undiffCore.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); // undiffCore.computeArea(); // undiffCore.computeDynamicPower(); - + if (coredynp.core_ty == OOO) { rnu.set_params(XML, ithCore, &interface_ip, coredynp); + rnu.set_stats(XML); } corepipe.set_params(&interface_ip, coredynp); - - - // area.set_area(area.get_area()+ corepipe.area.get_area()); - // //clock power // clockNetwork.init_wire_external(is_default, &interface_ip); // clockNetwork.clk_area =area*1.1;//10% of placement overhead. @@ -100,14 +107,14 @@ void Core::set_params(const ParseXML *XML_interface, // clockNetwork.optimize_wire(); } -void Core::computeArea(){ +void Core::computeArea() { if (coredynp.core_ty == OOO) { rnu.computeArea(); rnu.set_stats(XML); } corepipe.computeArea(); - if (coredynp.core_ty == OOO) { + if (coredynp.core_ty == OOO) { pipeline_area_per_unit = (corepipe.area.get_area() * coredynp.num_pipelines) / 5.0; if (rnu.exist) { @@ -136,7 +143,7 @@ void Core::computeArea(){ ifu.area.set_area(ifu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + ifu.area.get_area()); } - + if (lsu.exist) { lsu.area.set_area(lsu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + lsu.area.get_area()); @@ -150,7 +157,7 @@ void Core::computeArea(){ } exu.computeArea(); exu.set_stats(XML); - exu.computeStaticPower(); + exu.computeStaticPower(); if (exu.exist) { exu.area.set_area(exu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + exu.area.get_area()); @@ -162,7 +169,6 @@ void Core::computeArea(){ } } - void Core::computeDynamicPower(bool is_tdp) { /* * When computing TDP, power = energy_per_cycle (the value computed in this @@ -598,9 +604,7 @@ void Core::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } } -Core ::~Core() { - -} +Core ::~Core() {} void Core::set_core_param() { coredynp.opt_local = XML->sys.core[ithCore].opt_local; diff --git a/src/core/core.h b/src/core/core.h index 8602a3f..41fa9bb 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -52,8 +52,11 @@ class Core : public Component { const ParseXML *XML; int ithCore; InputParameter interface_ip; - double clockRate, executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; + double clockRate; + double executionTime; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; InstFetchU ifu; LoadStoreU lsu; MemManU mmu; @@ -70,13 +73,38 @@ class Core : public Component { // clock_network clockNetwork; Core(){}; void set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_); + int ithCore_, + InputParameter *interface_ip_, + bool cp = false); void computeArea(); void set_core_param(); void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~Core(); + + // Serialization + friend class boost::serialization::access; + + template + void serialize(Archive &ar, const unsigned int version) { + ar &clockRate; + ar &executionTime; + ar &clockRate; + ar &executionTime; + ar &scktRatio; + ar &chip_PR_overhead; + ar ¯o_PR_overhead; + ar &ifu; + ar &lsu; + ar &mmu; + ar &exu; + ar &rnu; + ar &corepipe; + ar &undiffCore; + ar &l2cache; + ar &pipeline_area_per_unit; + Component::serialize(ar, version); + } }; #endif /* CORE_H_ */ diff --git a/src/core/exec_unit.h b/src/core/exec_unit.h index 79e80af..7f9a886 100644 --- a/src/core/exec_unit.h +++ b/src/core/exec_unit.h @@ -97,7 +97,7 @@ class EXECU : public Component { template void serialize(Archive &ar, const unsigned int version) { - ar &bypass; + ar &bypass.area; ar &int_bypass; ar &intTagBypass; ar &int_mul_bypass; @@ -117,7 +117,8 @@ class EXECU : public Component { ar &clockRate; ar ∃ ar &ithCore; - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/instfetch.h b/src/core/instfetch.h index dffda2b..f645899 100644 --- a/src/core/instfetch.h +++ b/src/core/instfetch.h @@ -100,7 +100,8 @@ class InstFetchU : public Component { ar &executionTime; ar &clockRate; ar &ithCore; - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/loadstore.cc b/src/core/loadstore.cc index e553086..46dbddf 100644 --- a/src/core/loadstore.cc +++ b/src/core/loadstore.cc @@ -342,26 +342,70 @@ void LoadStoreU::computeArea() { dcache.caches.local_result.area); area.set_area(area.get_area() + dcache.caches.local_result.area); + if (dcache.caches.local_result.tag_array2 != nullptr) { + dcache.caches.local_result.ta2_power = + dcache.caches.local_result.tag_array2->power; + } + if (dcache.caches.local_result.data_array2 != nullptr) { + dcache.caches.local_result.da2_power = + dcache.caches.local_result.data_array2->power; + } + // dCache controllers // miss buffer dcache.missb.computeArea(); dcache.area.set_area(dcache.area.get_area() + dcache.missb.local_result.area); area.set_area(area.get_area() + dcache.missb.local_result.area); + if (dcache.missb.local_result.tag_array2 != nullptr) { + dcache.missb.local_result.ta2_power = + dcache.missb.local_result.tag_array2->power; + } + if (dcache.missb.local_result.data_array2 != nullptr) { + dcache.missb.local_result.da2_power = + dcache.missb.local_result.data_array2->power; + } + // fill buffer dcache.ifb.computeArea(); dcache.area.set_area(dcache.area.get_area() + dcache.ifb.local_result.area); area.set_area(area.get_area() + dcache.ifb.local_result.area); + if (dcache.ifb.local_result.tag_array2 != nullptr) { + dcache.ifb.local_result.ta2_power = + dcache.ifb.local_result.tag_array2->power; + } + if (dcache.ifb.local_result.data_array2 != nullptr) { + dcache.ifb.local_result.da2_power = + dcache.ifb.local_result.data_array2->power; + } + dcache.prefetchb.computeArea(); dcache.area.set_area(dcache.area.get_area() + dcache.prefetchb.local_result.area); area.set_area(area.get_area() + dcache.prefetchb.local_result.area); + if (dcache.prefetchb.local_result.tag_array2 != nullptr) { + dcache.prefetchb.local_result.ta2_power = + dcache.prefetchb.local_result.tag_array2->power; + } + if (dcache.prefetchb.local_result.data_array2 != nullptr) { + dcache.prefetchb.local_result.da2_power = + dcache.prefetchb.local_result.data_array2->power; + } + if (cache_p == Write_back) { dcache.wbb.computeArea(); dcache.area.set_area(dcache.area.get_area() + dcache.wbb.local_result.area); area.set_area(area.get_area() + dcache.wbb.local_result.area); + if (dcache.wbb.local_result.tag_array2 != nullptr) { + dcache.wbb.local_result.ta2_power = + dcache.wbb.local_result.tag_array2->power; + } + if (dcache.wbb.local_result.data_array2 != nullptr) { + dcache.wbb.local_result.da2_power = + dcache.wbb.local_result.data_array2->power; + } } /* @@ -519,7 +563,7 @@ void LoadStoreU::computeDynamicPower(bool is_tdp) { .dynamic + // assuming D cache is in the fast model which read // tag and data together dcache.caches.stats_t.writeAc.miss * - dcache.caches.local_result.tag_array2->power.readOp.dynamic + + dcache.caches.local_result.ta2_power.readOp.dynamic + dcache.caches.stats_t.writeAc.access * dcache.caches.local_result.power.writeOp.dynamic); diff --git a/src/core/loadstore.h b/src/core/loadstore.h index 4d34809..2e18bc3 100644 --- a/src/core/loadstore.h +++ b/src/core/loadstore.h @@ -85,7 +85,8 @@ class LoadStoreU : public Component { ar &dcache; ar &LSQ; ar &LoadQ; - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/mmu.cc b/src/core/mmu.cc index 792ac03..9e4525f 100644 --- a/src/core/mmu.cc +++ b/src/core/mmu.cc @@ -174,10 +174,11 @@ void MemManU::computeStaticPower() { } void MemManU::computeDynamicPower(bool is_tdp) { - if (!exist) + if (!exist) { return; + } if (!init_stats) { - std::cerr << "[ MCFrontEnd ] Error: must set params before calling " + std::cerr << "[ MemManU ] Error: must set params before calling " "computeDynamicPower()\n"; exit(1); } diff --git a/src/core/mmu.h b/src/core/mmu.h index 22b3a78..6fb4163 100644 --- a/src/core/mmu.h +++ b/src/core/mmu.h @@ -89,7 +89,8 @@ class MemManU : public Component { ar &itlb; ar &dtlb; ar ∃ - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/regfile.h b/src/core/regfile.h index b2627d9..dc3d644 100644 --- a/src/core/regfile.h +++ b/src/core/regfile.h @@ -89,7 +89,8 @@ class RegFU : public Component { ar &executionTime; ar &clockRate; ar &ithCore; - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/renaming_unit.h b/src/core/renaming_unit.h index 2ed1d36..c7ac344 100644 --- a/src/core/renaming_unit.h +++ b/src/core/renaming_unit.h @@ -105,7 +105,9 @@ class RENAMINGU : public Component { ar &idcl; ar &fdcl; ar ∃ - Component::serialize(ar, version); + ar &set_area; + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/scheduler.h b/src/core/scheduler.h index 7136916..1879345 100644 --- a/src/core/scheduler.h +++ b/src/core/scheduler.h @@ -100,7 +100,8 @@ class SchedulerU : public Component { ar &ROB; ar &instruction_selection; ar ∃ - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/logic/functional_unit.h b/src/logic/functional_unit.h index fc7eb74..896ceea 100644 --- a/src/logic/functional_unit.h +++ b/src/logic/functional_unit.h @@ -109,11 +109,8 @@ class FunctionalUnit : public Component { template void serialize(Archive &ar, const unsigned int version) { - ar &power_t; - ar &stats_t; - ar &tdp_stats; - ar &rtp_stats; ar &area_t; + ar &set_area; Component::serialize(ar, version); } }; diff --git a/src/logic/inst_decoder.h b/src/logic/inst_decoder.h index 54a3abf..c199c72 100644 --- a/src/logic/inst_decoder.h +++ b/src/logic/inst_decoder.h @@ -80,13 +80,14 @@ class inst_decoder : public Component { ar &local_result; ar &device_ty; ar &core_ty; - ar &final_dec; - ar &pre_dec; - ar &predec_blk1; - ar &predec_blk2; - ar &predec_blk_drv1; - ar &predec_blk_drv2; - Component::serialize(ar, version); + // ar &final_dec; + // ar &pre_dec; + // ar &predec_blk1; + // ar &predec_blk2; + // ar &predec_blk_drv1; + // ar &predec_blk_drv2; + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/logic/pipeline.cc b/src/logic/pipeline.cc index 9885672..0135b4c 100644 --- a/src/logic/pipeline.cc +++ b/src/logic/pipeline.cc @@ -34,14 +34,16 @@ #include "dff_cell.h" void Pipeline::set_params(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, - enum Device_ty device_ty_, - bool _is_core_pipeline, - bool _is_default) -{ - l_ip=*configure_interface; coredynp=dyn_p_; device_ty=device_ty_; - is_core_pipeline=_is_core_pipeline; is_default=_is_default; - num_piperegs=0.0; + const CoreDynParam &dyn_p_, + enum Device_ty device_ty_, + bool _is_core_pipeline, + bool _is_default) { + l_ip = *configure_interface; + coredynp = dyn_p_; + device_ty = device_ty_; + is_core_pipeline = _is_core_pipeline; + is_default = _is_default; + num_piperegs = 0.0; local_result = init_interface(&l_ip); if (!coredynp.Embedded) process_ind = true; @@ -59,7 +61,7 @@ void Pipeline::set_params(const InputParameter *configure_interface, load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); } -void Pipeline::computeArea(){ +void Pipeline::computeArea() { compute_stage_vector(); DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); pipe_reg.compute_DFF_cell(); @@ -103,7 +105,6 @@ void Pipeline::computeArea(){ area.set_area(area.get_area() * macro_layout_overhead); } - void Pipeline::compute_stage_vector() { double num_stages, tot_stage_vector, per_stage_vector; int opcode_length = diff --git a/src/logic/pipeline.h b/src/logic/pipeline.h index 924fd92..7a261b2 100644 --- a/src/logic/pipeline.h +++ b/src/logic/pipeline.h @@ -55,10 +55,10 @@ class Pipeline : public Component { public: Pipeline(){}; void set_params(const InputParameter *configure_interface, - const CoreDynParam &dyn_p_, - enum Device_ty device_ty_ = Core_device, - bool _is_core_pipeline = true, - bool _is_default = true); + const CoreDynParam &dyn_p_, + enum Device_ty device_ty_ = Core_device, + bool _is_core_pipeline = true, + bool _is_default = true); InputParameter l_ip; uca_org_t local_result; CoreDynParam coredynp; diff --git a/src/logic/undiff_core.cc b/src/logic/undiff_core.cc index 22dab0b..3442083 100644 --- a/src/logic/undiff_core.cc +++ b/src/logic/undiff_core.cc @@ -32,31 +32,36 @@ #include "undiff_core.h" void UndiffCore::set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_, - bool embedded_) + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_, + bool embedded_) // is_default(_is_default) -{ - XML=XML_interface; ithCore=ithCore_; interface_ip=*interface_ip_; - coredynp=dyn_p_; core_ty=coredynp.core_ty; embedded=XML->sys.Embedded; - pipeline_stage=coredynp.pipeline_stages; - num_hthreads=coredynp.num_hthreads; issue_width=coredynp.issueW; - exist=exist_; +{ + XML = XML_interface; + ithCore = ithCore_; + interface_ip = *interface_ip_; + coredynp = dyn_p_; + core_ty = coredynp.core_ty; + embedded = XML->sys.Embedded; + pipeline_stage = coredynp.pipeline_stages; + num_hthreads = coredynp.num_hthreads; + issue_width = coredynp.issueW; + exist = exist_; if (!exist) - return; + return; } - void UndiffCore::computeArea(){ - double undifferentiated_core = 0; +void UndiffCore::computeArea() { + double undifferentiated_core = 0; double core_tx_density = 0; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); double undifferentiated_core_coe; // XML_interface=_XML_interface; uca_org_t result2; result2 = init_interface(&interface_ip); - // Compute undifferentiated core area at 90nm. + // Compute undifferentiated core area at 90nm. if (embedded == false) { // Based on the results of polynomial/log curve fitting based on // undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, @@ -94,7 +99,7 @@ void UndiffCore::set_params(const ParseXML *XML_interface, // undifferentiated_core = 3*1e6; // undifferentiated_core *= // g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = undifferentiated_core * + power.readOp.leakage = undifferentiated_core * (core_tx_density)*cmos_Isub_leakage( 5 * g_tp.min_w_nmos_, 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r, @@ -121,7 +126,6 @@ void UndiffCore::set_params(const ParseXML *XML_interface, area.set_area(undifferentiated_core); - scktRatio = g_tp.sckt_co_eff; power.readOp.dynamic *= scktRatio; power.writeOp.dynamic *= scktRatio; @@ -154,8 +158,7 @@ void UndiffCore::set_params(const ParseXML *XML_interface, // // std::cout< class UndiffCore : public Component { -public: - +public: UndiffCore(){}; void set_params(const ParseXML *XML_interface, - int ithCore_, - InputParameter *interface_ip_, - const CoreDynParam &dyn_p_, - bool exist_ = true, - bool embedded_ = false); + int ithCore_, + InputParameter *interface_ip_, + const CoreDynParam &dyn_p_, + bool exist_ = true, + bool embedded_ = false); const ParseXML *XML; int ithCore; InputParameter interface_ip; @@ -69,7 +68,6 @@ class UndiffCore : public Component { bool is_default; void computeArea(); - void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); ~UndiffCore(){}; bool exist; diff --git a/src/processor.cc b/src/processor.cc index b4a8300..59c6b44 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -108,26 +108,30 @@ void Processor::init(const ParseXML *XML, bool cp) { } for (i = 0; i < numCore; i++) { - cores.push_back(new Core()); - cores[i]->set_params(XML, i, &interface_ip); - cores[i]->computeArea(); - cores[i]->computeDynamicPower(); - cores[i]->computeDynamicPower(false); + if (!cp) { + cores.push_back(Core()); + } + cores[i].set_params(XML, i, &interface_ip, cp); + if (!cp) { + cores[i].computeArea(); + } + cores[i].computeDynamicPower(); + cores[i].computeDynamicPower(false); if (procdynp.homoCore) { core.area.set_area(core.area.get_area() + - cores[i]->area.get_area() * procdynp.numCore); + cores[i].area.get_area() * procdynp.numCore); set_pppm(pppm_t, - cores[i]->clockRate * procdynp.numCore, + cores[i].clockRate * procdynp.numCore, procdynp.numCore, procdynp.numCore, procdynp.numCore); - core.power = core.power + cores[i]->power * pppm_t; + core.power = core.power + cores[i].power * pppm_t; set_pppm(pppm_t, - 1 / cores[i]->executionTime, + 1 / cores[i].executionTime, procdynp.numCore, procdynp.numCore, procdynp.numCore); - core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; + core.rt_power = core.rt_power + cores[i].rt_power * pppm_t; area.set_area(area.get_area() + core.area.get_area()); // placement and routing overhead is // 10%, core scales worse than cache @@ -135,20 +139,20 @@ void Processor::init(const ParseXML *XML, bool cp) { power = power + core.power; rt_power = rt_power + core.rt_power; } else { - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); + core.area.set_area(core.area.get_area() + cores[i].area.get_area()); area.set_area( area.get_area() + - cores[i]->area.get_area()); // placement and routing overhead is 10%, - // core scales worse than cache 40% is - // accumulated from 90 to 22nm + cores[i].area.get_area()); // placement and routing overhead is 10%, + // core scales worse than cache 40% is + // accumulated from 90 to 22nm - set_pppm(pppm_t, cores[i]->clockRate, 1, 1, 1); - core.power = core.power + cores[i]->power * pppm_t; - power = power + cores[i]->power * pppm_t; + set_pppm(pppm_t, cores[i].clockRate, 1, 1, 1); + core.power = core.power + cores[i].power * pppm_t; + power = power + cores[i].power * pppm_t; - set_pppm(pppm_t, 1 / cores[i]->executionTime, 1, 1, 1); - core.rt_power = core.rt_power + cores[i]->rt_power * pppm_t; - rt_power = rt_power + cores[i]->rt_power * pppm_t; + set_pppm(pppm_t, 1 / cores[i].executionTime, 1, 1, 1); + core.rt_power = core.rt_power + cores[i].rt_power * pppm_t; + rt_power = rt_power + cores[i].rt_power * pppm_t; } } @@ -992,7 +996,7 @@ void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { << std::endl; if (plevel > 1) { for (i = 0; i < numCore; i++) { - cores[i]->displayEnergy(indent + 4, plevel, is_tdp); + cores[i].displayEnergy(indent + 4, plevel, is_tdp); std::cout << "**************************************************************" "***************************" @@ -1196,9 +1200,4 @@ void Processor::set_proc_param() { interface_ip.add_ecc_b_ = true; } -Processor::~Processor() { - while (!cores.empty()) { - delete cores.back(); - cores.pop_back(); - } -}; +Processor::~Processor(){}; diff --git a/src/processor.h b/src/processor.h index 1b8e93a..9f24e37 100644 --- a/src/processor.h +++ b/src/processor.h @@ -63,7 +63,7 @@ class Processor : public Component { private: const ParseXML *XML; - vector cores; + vector cores; vector l2array; vector l3array; vector l1dirarray; @@ -103,6 +103,7 @@ class Processor : public Component { template void serialize(Archive &ar, const unsigned int version) { + ar &cores; ar &l2array; ar &l3array; ar &l1dirarray; From ee61a7bc69a4fea91539fb9d707d30f3b82b1b3c Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 24 Jun 2020 12:25:05 -0500 Subject: [PATCH 58/59] serialization: Serialization Complete Passing all tests, no segmentation fault: --- src/core/core.h | 3 ++- src/core/instfetch.cc | 4 ++-- src/logic/functional_unit.cc | 12 +++++++++++- src/logic/inst_decoder.cc | 8 ++++---- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/core/core.h b/src/core/core.h index 41fa9bb..fa32425 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -103,7 +103,8 @@ class Core : public Component { ar &undiffCore; ar &l2cache; ar &pipeline_area_per_unit; - Component::serialize(ar, version); + ar &Component::area; + // Component::serialize(ar, version); } }; diff --git a/src/core/instfetch.cc b/src/core/instfetch.cc index 7286a6a..5083b3a 100644 --- a/src/core/instfetch.cc +++ b/src/core/instfetch.cc @@ -833,7 +833,7 @@ void InstFetchU::displayEnergy(uint32_t indent, int plevel, bool is_tdp) { } InstFetchU ::~InstFetchU() { - - if (!exist) + if (!exist) { return; + } } diff --git a/src/logic/functional_unit.cc b/src/logic/functional_unit.cc index 49c4b1f..3ac9e40 100644 --- a/src/logic/functional_unit.cc +++ b/src/logic/functional_unit.cc @@ -70,6 +70,17 @@ void FunctionalUnit::set_params(const ParseXML *XML, embedded = XML->sys.Embedded; clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; + if (fu_type == FPU) { + num_fu = coredynp.num_fpus; + } else if (fu_type == ALU) { + num_fu = coredynp.num_alus; + } else if (fu_type == MUL) { + num_fu = coredynp.num_muls; + } else { + std::cout << "[ FunctionalUnit ] Error: Unknown Functional Unit Type" + << std::endl; + exit(1); + } init_params = true; } @@ -83,7 +94,6 @@ void FunctionalUnit::set_stats(const ParseXML *XML) { void FunctionalUnit::computeArea() { if (embedded) { if (fu_type == FPU) { - num_fu = coredynp.num_fpus; // area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is // um^2 area_t = 4.47 * 1e6 * diff --git a/src/logic/inst_decoder.cc b/src/logic/inst_decoder.cc index d40ee6f..26b26bc 100644 --- a/src/logic/inst_decoder.cc +++ b/src/logic/inst_decoder.cc @@ -231,8 +231,8 @@ void inst_decoder::leakage_feedback(double temperature) { inst_decoder::~inst_decoder() { local_result.cleanup(); - delete pre_dec.blk1; - delete pre_dec.blk2; - delete pre_dec.drv1; - delete pre_dec.drv2; + // delete pre_dec.blk1; + // delete pre_dec.blk2; + // delete pre_dec.drv1; + // delete pre_dec.drv2; } From 2f9729970bbc0e614c0b5f08efb7257136570ee2 Mon Sep 17 00:00:00 2001 From: Ramakrishna Kanungo Date: Wed, 24 Jun 2020 12:45:08 -0500 Subject: [PATCH 59/59] organized code --- src/core/branch_predictor.cc | 2 +- src/core/core.cc | 37 ++++++++++++++++++++++++++---------- src/core/core.h | 1 + src/core/exec_unit.cc | 4 ++-- src/core/mmu.cc | 2 +- src/processor.cc | 1 + 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/core/branch_predictor.cc b/src/core/branch_predictor.cc index 199d25b..9e90bec 100644 --- a/src/core/branch_predictor.cc +++ b/src/core/branch_predictor.cc @@ -251,7 +251,7 @@ void BranchPredictor::computeDynamicPower(bool is_tdp) { if (!exist) return; if (!init_stats) { - std::cerr << "[ BranchPredictor ] Error: must set params before calling " + std::cerr << "[ BranchPredictor ] Error: must set stats before calling " "computeDynamicPower()\n"; exit(1); } diff --git a/src/core/core.cc b/src/core/core.cc index c995e35..1c6632b 100644 --- a/src/core/core.cc +++ b/src/core/core.cc @@ -65,13 +65,11 @@ void Core::set_params(const ParseXML *XML_interface, if (XML->sys.Private_L2) { l2cache.set_params(XML, ithCore, &interface_ip); - l2cache.set_stats(XML); } clockRate = coredynp.clockRate; executionTime = coredynp.executionTime; ifu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); - ifu.set_stats(XML); lsu.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); if (!cp) { @@ -79,11 +77,11 @@ void Core::set_params(const ParseXML *XML_interface, // the lsu.lsq_height which is set in compute area } mmu.set_params(XML, ithCore, &interface_ip, coredynp); - mmu.set_stats(XML); + //mmu.set_stats(XML); exu.set_params( XML, ithCore, &interface_ip, lsu.lsq_height, coredynp, exit_flag); - exu.set_stats(XML); + //exu.set_stats(XML); undiffCore.set_params(XML, ithCore, &interface_ip, coredynp, exit_flag); @@ -92,7 +90,7 @@ void Core::set_params(const ParseXML *XML_interface, if (coredynp.core_ty == OOO) { rnu.set_params(XML, ithCore, &interface_ip, coredynp); - rnu.set_stats(XML); + //rnu.set_stats(XML); } corepipe.set_params(&interface_ip, coredynp); @@ -107,10 +105,32 @@ void Core::set_params(const ParseXML *XML_interface, // clockNetwork.optimize_wire(); } +void Core::set_stats(const ParseXML *XML_interface){ + if (coredynp.core_ty == OOO) { + rnu.set_stats(XML); + } + + + + if (XML->sys.Private_L2) { + l2cache.set_stats(XML); + + } + + + ifu.set_stats(XML); + + + mmu.set_stats(XML); + + exu.set_stats(XML); + exu.computeStaticPower(); + +} + void Core::computeArea() { if (coredynp.core_ty == OOO) { rnu.computeArea(); - rnu.set_stats(XML); } corepipe.computeArea(); @@ -133,11 +153,10 @@ void Core::computeArea() { } if (XML->sys.Private_L2) { - l2cache.set_stats(XML); l2cache.computeArea(); area.set_area(area.get_area() + l2cache.area.get_area()); + } - ifu.set_stats(XML); ifu.computeArea(); if (ifu.exist) { ifu.area.set_area(ifu.area.get_area() + pipeline_area_per_unit); @@ -150,13 +169,11 @@ void Core::computeArea() { } mmu.computeArea(); - mmu.set_stats(XML); if (mmu.exist) { mmu.area.set_area(mmu.area.get_area() + pipeline_area_per_unit); area.set_area(area.get_area() + mmu.area.get_area()); } exu.computeArea(); - exu.set_stats(XML); exu.computeStaticPower(); if (exu.exist) { exu.area.set_area(exu.area.get_area() + pipeline_area_per_unit); diff --git a/src/core/core.h b/src/core/core.h index 41fa9bb..15a6173 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -77,6 +77,7 @@ class Core : public Component { InputParameter *interface_ip_, bool cp = false); void computeArea(); + void set_stats(const ParseXML *XML_interface); void set_core_param(); void computeDynamicPower(bool is_tdp = true); void displayEnergy(uint32_t indent = 0, int plevel = 100, bool is_tdp = true); diff --git a/src/core/exec_unit.cc b/src/core/exec_unit.cc index 7c24705..20162ea 100644 --- a/src/core/exec_unit.cc +++ b/src/core/exec_unit.cc @@ -455,8 +455,8 @@ void EXECU::computeArea() { } void EXECU::computeDynamicPower(bool is_tdp) { - if (!init_params) { - std::cerr << "[ EXECU ] Error: must set params before calling " + if (!init_stats) { + std::cerr << "[ EXECU ] Error: must set stats before calling " "computeStaticPower()\n"; exit(1); } diff --git a/src/core/mmu.cc b/src/core/mmu.cc index 9e4525f..e71edc4 100644 --- a/src/core/mmu.cc +++ b/src/core/mmu.cc @@ -178,7 +178,7 @@ void MemManU::computeDynamicPower(bool is_tdp) { return; } if (!init_stats) { - std::cerr << "[ MemManU ] Error: must set params before calling " + std::cerr << "[ MemManU ] Error: must set stats before calling " "computeDynamicPower()\n"; exit(1); } diff --git a/src/processor.cc b/src/processor.cc index 59c6b44..0517a04 100644 --- a/src/processor.cc +++ b/src/processor.cc @@ -115,6 +115,7 @@ void Processor::init(const ParseXML *XML, bool cp) { if (!cp) { cores[i].computeArea(); } + cores[i].set_stats(XML); cores[i].computeDynamicPower(); cores[i].computeDynamicPower(false); if (procdynp.homoCore) {