diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..aacac66
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,137 @@
+---
+Language: Cpp
+# BasedOnStyle: LLVM
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveMacros: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: MultiLine
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+ AfterCaseLabel: false
+ AfterClass: true
+ AfterControlStatement: Always
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+# BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+# BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IncludeBlocks: Regroup
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ SortPriority: 0
+ - Regex: '^(<|"(gtest|gmock|isl|json)/)'
+ Priority: 3
+ SortPriority: 0
+ - Regex: '.*'
+ Priority: 1
+ SortPriority: 0
+IncludeIsMainRegex: '(Test)?$'
+IncludeIsMainSourceRegex: ''
+IndentCaseLabels: true
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+ReflowComments: true
+SortIncludes: true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+Standard: Latest
+StatementMacros:
+ - Q_UNUSED
+ - QT_REQUIRE_VERSION
+TabWidth: 2
+UseCRLF: false
+UseTab: Never
+...
+
diff --git a/.gitignore b/.gitignore
index 2f66b66..0775c39 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,18 @@
# McPAT custom
mcpat
-obj_opt/
\ No newline at end of file
+obj_opt/
+
+# vim
+.*.swo
+.*.swp
+
+# Unit-Test
+unit_test/output
+
+# CMake
+build*
+
+# Profiling
+gmon.out
+profile*.txt
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000..fdd0723
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,2 @@
+[style]
+based_on_style = yapf
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..fd4f0c2
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,36 @@
+if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
+ message(FATAL_ERROR "Create a separate build directory")
+endif()
+
+cmake_minimum_required (VERSION 3.12)
+
+project(mcpat DESCRIPTION "Power Timing Area Calculator"
+ LANGUAGES CXX)
+
+find_package(Boost 1.56 REQUIRED COMPONENTS
+ program_options
+ serialization)
+
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+find_package(Threads REQUIRED)
+
+set (MCPAT_VERSION_MAJOR 1)
+set (MCPAT_VERSION_MINOR 3)
+set (MCPAT_VERSION_PATCH 0)
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function")
+set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_PROFILE "-O3 -pg -g")
+set(CMAKE_C_COMPILER clang)
+set(CMAKE_CXX_COMPILER clang++)
+
+add_definitions(-DNTHREADS=1)
+
+add_subdirectory(src)
+
diff --git a/README b/README
deleted file mode 100644
index d5473af..0000000
--- a/README
+++ /dev/null
@@ -1,228 +0,0 @@
- __ __ ____ _ _____
- | \/ | ___| _ \ / \|_ _|
- | |\/| |/ __| |_) / _ \ | |
- | | | | (__| __/ ___ \| |
- |_| |_|\___|_| /_/ \_\_|
-
-McPAT: (M)ulti(c)ore (P)ower, (A)rea, and (T)iming
-Current version 1.3
-==================================================
-
-* What McPAT is:
-
- --Architectural integrated power, area, and timing modeling framework, focuses on power and area modeling, with a target clock rate as a design constraint.
- -Consider power, area, and timing simultaneously
- -Complete power envelope
- -Power management techniques
- ---Manycore processor modeling framework
- -Different cores, uncore, and system (I/O) components
- -Holistically modeling across stacks: Technology models from ITRS projections (also supports user defined vdd), processor modeling based on modern processors
- ---Flexible, extensible, and high (i.e., architecture) level framework
- -A framework for architecture research
- -Flexible to make researchers's life easier
- Pre-populated micro-architecture configurations (can be changed by experienced users too!)
- Multilevel automatic optimization
- -Hierarchical modeling framework for easy extension and porting
- Standalone for TDP
- Paired up with performance simulators (or machine profiling statistics) for fine-grained study
-
-* What McPAT is NOT
-
- ---a hardware design EDA platform; nor a performance simulator
- -Use RTL/SPICE/...(not McPAT) if focusing on details of complex logic or analog components
- Empirical and curve-fitting based modeling for complex logic and analog building blocks (the most practical modeling methodology for high level framework).
- Solution1: Users replace those models with in-house models obtained from EDA tools
- Solution2: Users contribute their EDA based detailed models back to the community for sharing
- -Use performance simulators for performance (McPAT cannot do performance simulations)
- ---a restrictive environment
- -It is a framework (rather than a black-box tool)
- -Its built-in models are for references and for providing methodological examples.
- McPAT's built-in model includes simplified assumptions (e.g. unified instruction window for all instruction types)
- McPAT provides building blocks so that it is composable
- Users should always understand the methodology when using the built-in models or compose their own models.
- ---finished!
- -There is always room for improvement . . .
- -Thanks for the continueous contributing from the user community!
-
-====================
-For complete documentation of the McPAT, please refer to the following paper,
-"McPAT: An Integrated Power, Area, and Timing Modeling
- Framework for Multicore and Manycore Architectures",
-that appears in MICRO 2009. Please cite the paper, if you use
-McPAT in your work. The bibtex entry is provided below for your convenience.
-
- @inproceedings{mcpat:micro,
- author = {Sheng Li and Jung Ho Ahn and Richard D. Strong and Jay B. Brockman and Dean M. Tullsen and Norman P. Jouppi},
- title = "{McPAT: An Integrated Power, Area, and Timing Modeling Framework for Multicore and Manycore Architectures}",
- booktitle = {MICRO 42: Proceedings of the 42nd Annual IEEE/ACM International Symposium on Microarchitecture},
- year = {2009},
- pages = {469--480},
- }
-
-
-How to use the tool?
-====================
-
-McPAT takes input parameters from an XML-based interface,
-then it computes area and peak power of the
-Please note that the peak power is the absolute worst case power,
-which could be even higher than TDP.
-
-1. Steps to run McPAT:
- -> define the target processor using inorder.xml or OOO.xml
- -> run the "mcpat" binary:
- ./mcpat -infile <*.xml> -print_level < level of detailed output>
- ./mcpat -h (or mcpat --help) will show the quick help message.
-
-2. Optimization:
- McPAT will try its best to satisfy the target clock rate.
- When it cannot find a valid solution, it gives out warnings,
- while still giving a solution that is closest to the timing
- constraints and calculate power based on it. The optimization
- will lead to larger power/area numbers for target higher clock
- rate. McPAT also provides the option "-opt_for_clk" to turn on
- ("-opt_for_clk 1") and off this strict optimization for the
- timing constraint. When it is off, McPAT always optimize
- component for ED^2P without worrying about meeting the
- target clock frequency. By turning it off, the computation time
- can be reduced, which suites for situations where target clock rate
- is conservative.
-
-3. Outputs:
- McPAT outputs results in a hierarchical manner. Increasing
- the "-print_level" will show detailed results inside each
- component. For each component, major parts are shown, and associated
- pipeline registers/control logic are added up in total area/power of each
- components. In general, McPAT does not model the area/overhead of the pad
- frame used in a processor die.
-
-4. How to use the XML interface for McPAT
- 4.1 Set up the parameters
- Parameters of target designs need to be set in the *.xml file for
- entries tagged as "param". McPAT have very detailed parameter settings.
- please remove the structure parameter from the file if you want
- to use the default values. Otherwise, the parameters in the xml file
- will override the default values.
-
- 4.2 Pass the statistics
- There are two options to get the correct stats: a) the performance
- simulator can capture all the stats in detail and pass them to McPAT;
- b). Performance simulator can only capture partial stats and pass
- them to McPAT, while McPAT can reason about the complete stats using
- the partial information and the configuration. Therefore, there are
- some overlap for the stats.
-
- 4.3 Interface XML file structures (PLEASE READ!)
- The XML is hierarchical from processor level to micro-architecture
- level. McPAT support both heterogeneous and homogeneous manycore processors.
-
- 1). For heterogeneous processor setup, each component (core, NoC, cache,
- and etc) must have its own instantiations (core0, core1, ..., coreN).
- Each instantiation will have different parameters as well as its stats.
- Thus, the XML file must have multiple "instantiation" of each type of
- heterogeneous components and the corresponding hetero flags must be set
- in the XML file. Then state in the XML should be the stats of "a" instantiation
- (e.g. "a" cores). The reported runtime dynamic is of a single instantiation
- (e.g. "a" cores). Since the stats for each (e.g. "a" cores) may be different,
- we will see a whole list of (e.g. "a" cores) with different dynamic power,
- and total power is just a sum of them.
-
- 2). For homogeneous processors, the same method for heterogeneous can
- also be used by treating all homogeneous instantiations as heterogeneous.
- However, a preferred approach is to use a single representative for all
- the same components (e.g. core0 to represent all cores) and set the
- processor to have homogeneous components (e.g. ). Thus, the XML file only has one instantiation to represent
- all others with the same architectural parameters. The corresponding homo
- flags must be set in the XML file. Then, the stats in the XML should be
- the aggregated stats of the sum of all instantiations (e.g. aggregated stats
- of all cores). In the final results, McPAT will only report a single
- instantiation of each type of component, and the reported runtime dynamic power
- is the sum of all instantiations of the same type. This approach can run fast
- and use much less memory.
-
-5. Guide for integrating McPAT into performance simulators and bypassing the XML interface
- The detailed work flow of McPAT has two phases: the initialization phase and
- the computation phase. Specifically, in order to start the initialization phase a
- user specifies static configurations, including parameters at all three levels,
- namely, architectural, circuit, and technology levels. During the initialization
- phase, McPAT will generate the internal chip representation using the configurations
- set by the user.
- The computation phase of McPAT is called by McPAT or the performance simulator
- during simulation to generate runtime power numbers. Before calling McPAT to
- compute runtime power numbers, the performance simulator needs to pass the
- statistics, namely, the activity factors of each individual components to McPAT
- via the XML interface.
- The initialization phase is very time-consuming, since it will repeat many
- times until valid configurations are found or the possible configurations are
- exhausted. To reduce the overhead, a user can let the simulator to call McPAT
- directly for computation phase and only call initialization phase once at the
- beginning of simulation. In this case, the XML interface file is bypassed,
- please refer to processor.cc to see how the two phases are called.
-
-6. Sample input files:
- This package provide sample XML files for validating target processors. Please find the
- enclosed Niagara1.xml (for the Sun Niagara1 processor), Niagara2.xml (for the Sun Niagara2
- processor), Alpha21364.xml (for the Alpha21364 processor), Xeon.xml (for the Intel
- Xeon Tulsa processor), and ARM_A9_2GHz.xml (for ARM Cortex A9 hard core 2GHz implementation from
- ARM)
-
-7. Modeling of power management techniques:
- McPAT supports both DVS and power-gating. For DVS, users can use default ITRS projected vdd
- at each technology node as supply voltage at DVS level 0 (DVS0) or define voltage at DVS0.
- For power-gating, McPAT supports both default power-saving virtual supply voltage computed
- automatically using technology parameters. Default means using technology (ITRS based)
- lowest value for state-retaining power-gating User can also defined voltage for Power-saving states,
- as shown in example file of Xeon.xml (search for power_gating_vcc). When using user-defined power-saving
- virtual supply voltage, please understand the implications when setting up voltage for different sleep states.
- For example, when deep sleep state is used (voltage lower than the technology allowed state retaining supply voltage),
- the effects of losing data and cold start effects (beyond the scope of McPAT) must be considered when waking up the architecture.
- Power-gating and DVS cannot happen at the same time. Because power-gating happens when circuit is idle, while DVS happens when
- circuit blocks are active.
-
-
-====================
-McPAT includes its special version of Cacti (called Cacti-P) based on Cacti6.5 release. The major changes of
-the special Cacti, called Cacti-P in this distro, (compared to cacti6.5) include the following new features.
-The inclosed Cacti-P can run stand-alone if users want to use these features.
-
- * CAM and fully associative cache modeling
- * Improved leakage power modeling with consideration of device/gate topology
- * long channel device for reduce sub-threshold leakage power
- * Sleep transistor based power-gating modeling
- * gate leakage power
- * Support user defined voltage supply (Vdd)
- * Dynamic voltage scaling (DVS)
-
-For complete documentation of Cacti-P, please refer to the following paper,
-"CACTI-P: Architecture-Level Modeling for SRAM-based Structures with Advanced Leakage Reduction Techniques",
-that appeared in ICCAD2011. Please cite the paper, if you use
-Cacti-P in your work. The bibtex entry is provided below for your convenience.
-
-@inproceedings{cacti-p:iccad,
- author = {Sheng Li and Ke Chen and Jung Ho Ahn and Jay B. Brockman and Norman P. Jouppi},
- title = {CACTI-P: Architecture-level modeling for SRAM-based structures with advanced leakage reduction techniques},
- booktitle = {ICCAD: International Conference on Computer-Aided Design},
- year = {2011},
- pages = {694-701},
-}
-
-
-====================
-McPAT uses an opensource XML parser written and kindly specially licensed by Mr. Frank Vanden Berghen.
-The detailed information about this XML parser can be found at the license information in xmlParse.cc/xmlParse.h
-
-====================
-McPAT is in its beginning stage. We are still improving the tool.
-Please come back to its website for newer versions.
-McPAT has been constantly and rapidly improved with new models and latest technology.
-Please always refer to its code for most up-to-date and most accurate information.
-If you have any comments, questions, or suggestions, please write to us:
-
-
-Sheng Li
-Sheng.sli@gmail.com
-
-
-
-
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..81b0584
--- /dev/null
+++ b/README.md
@@ -0,0 +1,192 @@
+# McPAT
+
+McPAT: (**M**)ulti(**c**)ore (**P**)ower, (**A**)rea, and (**T**)iming
+Current version 1.4.0
+
+## About McPAT
+What McPAT is:
+- Architectural integrated power, area, and timing modeling framework, focuses on power and area modeling, with a target clock rate as a design constraint.
+ - Consider power, area, and timing simultaneously
+ - Complete power envelope
+ - Power management techniques
+- Manycore processor modeling framework
+ - Different cores, uncore, and system (I/O) components
+ - Holistically modeling across stacks: Technology models from ITRS projections (also supports user defined vdd), processor modeling based on modern processors
+- Flexible, extensible, and high (i.e., architecture) level framework
+ - A framework for architecture research
+ - Flexible to make researchers's life easier
+ - Pre-populated micro-architecture configurations (can be changed by experienced users too!)
+ - Multilevel automatic optimization
+ - Hierarchical modeling framework for easy extension and porting
+ - Standalone for TDP
+ - Paired up with performance simulators (or machine profiling statistics) for fine-grained study
+
+What McPAT is NOT:
+- A hardware design EDA platform; nor a performance simulator
+ - Use RTL/SPICE/...(not McPAT) if focusing on details of complex logic or analog components
+ - Empirical and curve-fitting based modeling for complex logic and analog building blocks (the most practical modeling methodology for high level framework).
+ - Solution1: Users replace those models with in-house models obtained from EDA tools
+ - Solution2: Users contribute their EDA based detailed models back to the community for sharing
+ - Use performance simulators for performance (McPAT cannot do performance simulations)
+- A restrictive environment
+ - It is a framework (rather than a black-box tool)
+ - Its built-in models are for references and for providing methodological examples.
+ - McPAT's built-in model includes simplified assumptions (e.g. unified instruction window for all instruction types)
+ - McPAT provides building blocks so that it is composable
+ - Users should always understand the methodology when using the built-in models or compose their own models.
+- Finished!
+ - There is always room for improvement . . .
+ - Thanks for the continueous contributing from the user community!
+
+***
+
+For complete documentation of the McPAT, please refer to the following paper,
+*"McPAT: An Integrated Power, Area, and Timing Modeling
+ Framework for Multicore and Manycore Architectures"*,
+that appears in MICRO 2009. Please cite the paper, if you use
+McPAT in your work. The bibtex entry is provided below for your convenience.
+```
+@inproceedings{mcpat:micro,
+ author = {Sheng Li and Jung Ho Ahn and Richard D. Strong and Jay B. Brockman and Dean M. Tullsen and Norman P. Jouppi},
+ title = "{McPAT: An Integrated Power, Area, and Timing Modeling Framework for Multicore and Manycore Architectures}",
+ booktitle = {MICRO 42: Proceedings of the 42nd Annual IEEE/ACM International Symposium on Microarchitecture},
+ year = {2009},
+ pages = {469--480},
+}
+```
+
+## Usage
+
+McPAT takes input parameters from an XML-based interface,
+then it computes area and peak power of the
+Please note that the peak power is the absolute worst case power,
+which could be even higher than TDP.
+
+### Build:
+This fork of McPAT has 3 dependencies:
+1. [CMake 3.12+](https://cmake.org/)
+2. [LibBoost 1.56+](https://www.boost.org/)
+3. [Clang](https://clang.llvm.org/)
+
+To build McPAT:
+1. Create a build directory:
+ ```
+ mkdir build
+ ```
+2. Change directory to the build directory:
+ ```
+ cd build/
+ ```
+3. Use CMake to Generate the Unix Makefiles. There are 3 build types:
+ - Release - Enables -O3 optimizations
+ - Profile - Enables -O3 optimazations and compiles with flags for profiling with [gprof](https://ftp.gnu.org/old-gnu/Manuals/gprof-2.9.1/html_mono/gprof.html)
+ - Debug - Disables optimizations and compiles with debug flags for use with GDB
+ ```
+ cmake -DCMAKE_BUILD_TYPE=release ..
+ ```
+4. Make
+ ```
+ make clean && make -j
+ ```
+
+### Unit Testing:
+
+This version of McPAT also comes with a unit testing framework for verifying your builds. It tests all the included processor description files and outputs from v1.3.0 and has support for unit testing the restoration from a serialized file.
+
+To run the unit tests:
+```
+./unit_test.sh
+```
+This will run through all the test vectors and output the number of tests passed and test failed, along with providing a diff file for all the tests that failed. You can check the output directory for the diff file to see what was different from the expected output.
+
+
+### Run:
+
+To run McPAT outside of the unit testing framework, a configuration xml must be provided as an input.
+```
+./mcpat -i
+```
+To display the help message:
+```
+./mcpat --help
+```
+To create a serialization checkpoint:
+```
+./mcpat -i --serial_create=true --serial_file=
+```
+To restore from a serialization checkpoint:
+```
+./mcpat -i --serial_restore=true --serial_file=
+```
+
+## General Notes:
+
+1. Optimization: McPAT will try its best to satisfy the target clock rate. When it cannot find a valid solution, it gives out warnings, while still giving a solution that is closest to the timing constraints and calculate power based on it. The optimization will lead to larger power/area numbers for target higher clockrate. McPAT also provides the option "-opt_for_clk" to turn on ("-opt_for_clk 1") and off this strict optimization for the timing constraint. When it is off, McPAT always optimize component for ED^2P without worrying about meeting the target clock frequency. By turning it off, the computation time can be reduced, which suites for situations where target clock rateis conservative.
+2. Outputs: McPAT outputs results in a hierarchical manner. Increasing the "-print_level" will show detailed results inside each component. For each component, major parts are shown, and associated pipeline registers/control logic are added up in total area/power of each components. In general, McPAT does not model the area/overhead of the pad frame used in a processor die.
+
+3. How to use the XML interface for McPAT
+ 1. Set up the parameters Parameters of target designs need to be set in the *.xml file for entries tagged as "param". McPAT have very detailed parameter settings. Please remove the structure parameter from the file if you want to use the default values. Otherwise, the parameters in the xml file will override the default values.
+ 2. Pass the statistics: There are two options to get the correct stats:
+ 1. The performance simulator can capture all the stats in detail and pass them to McPAT;
+ 2. Performance simulator can only capture partial stats and pass them to McPAT, while McPAT can reason about the complete stats using the partial information and the configuration. Therefore, there are some overlap for the stats.
+
+4. Interface XML file structures (**PLEASE READ!**) The XML is hierarchical from processor level to micro-architecture level. McPAT support both heterogeneous and homogeneous manycore processors.
+ 1. For heterogeneous processor setup, each component (core, NoC, cache, and etc) must have its own instantiations (core0, core1, ..., coreN). Each instantiation will have different parameters as well as its stats. Thus, the XML file must have multiple "instantiation" of each type of heterogeneous components and the corresponding hetero flags must be set in the XML file. Then state in the XML should be the stats of "a" instantiation (e.g. "a" cores). The reported runtime dynamic is of a single instantiation (e.g. "a" cores). Since the stats for each (e.g. "a" cores) may be different, we will see a whole list of (e.g. "a" cores) with different dynamic power, and total power is just a sum of them.
+ 2. For homogeneous processors, the same method for heterogeneous can also be used by treating all homogeneous instantiations as heterogeneous. However, a preferred approach is to use a single representative for all the same components (e.g. core0 to represent all cores) and set the processor to have homogeneous components (e.g. ). Thus, the XML file only has one instantiation to represent all others with the same architectural parameters. The corresponding homo flags must be set in the XML file. Then, the stats in the XML should be the aggregated stats of the sum of all instantiations (e.g. aggregated stats of all cores). In the final results, McPAT will only report a single instantiation of each type of component, and the reported runtime dynamic poweris the sum of all instantiations of the same type. This approach can run fast and use much less memory.
+
+5. Guide for integrating McPAT into performance simulators and bypassing the XML interface The detailed work flow of McPAT has two phases: the initialization phase and the computation phase. Specifically, in order to start the initialization phase a user specifies static configurations, including parameters at all three levels, namely, architectural, circuit, and technology levels. During the initialization phase, McPAT will generate the internal chip representation using the configurations set by the user. The computation phase of McPAT is called by McPAT or the performance simulator during simulation to generate runtime power numbers. Before calling McPAT to compute runtime power numbers, the performance simulator needs to pass the statistics, namely, the activity factors of each individual components to McPAT via the XML interface. The initialization phase is very time-consuming, since it will repeat many times until valid configurations are found or the possible configurations are exhausted. To reduce the overhead, a user can let the simulator to call McPAT directly for computation phase and only call initialization phase once at the beginning of simulation. In this case, the XML interface file is bypassed, please refer to processor.cc to see how the two phases are called.
+
+6. Sample input files: This package provide sample XML files for validating target processors. Please find the enclosed Niagara1.xml (for the Sun Niagara1 processor), Niagara2.xml (for the Sun Niagara2 processor), Alpha21364.xml (for the Alpha21364 processor), Xeon.xml (for the Intel Xeon Tulsa processor), and ARM_A9_2GHz.xml (for ARM Cortex A9 hard core 2GHz implementation from ARM)
+
+7. Modeling of power management techniques: McPAT supports both DVS and power-gating. For DVS, users can use default ITRS projected vdd at each technology node as supply voltage at DVS level 0 (DVS0) or define voltage at DVS0. For power-gating, McPAT supports both default power-saving virtual supply voltage computed automatically using technology parameters. Default means using technology (ITRS based) lowest value for state-retaining power-gating User can also defined voltage for Power-saving states, as shown in example file of Xeon.xml (search for power_gating_vcc). When using user-defined power-saving virtual supply voltage, please understand the implications when setting up voltage for different sleep states. For example, when deep sleep state is used (voltage lower than the technology allowed state retaining supply voltage), the effects of losing data and cold start effects (beyond the scope of McPAT) must be considered when waking up the architecture. Power-gating and DVS cannot happen at the same time. Because power-gating happens when circuit is idle, while DVS happens when circuit blocks are active.
+
+***
+
+McPAT includes its special version of Cacti (called Cacti-P) based on Cacti6.5 release. The major changes of
+the special Cacti, called Cacti-P in this distro, (compared to cacti6.5) include the following new features.
+The inclosed Cacti-P can run stand-alone if users want to use these features.
+
+ * CAM and fully associative cache modeling
+ * Improved leakage power modeling with consideration of device/gate topology
+ * long channel device for reduce sub-threshold leakage power
+ * Sleep transistor based power-gating modeling
+ * gate leakage power
+ * Support user defined voltage supply (Vdd)
+ * Dynamic voltage scaling (DVS)
+
+For complete documentation of Cacti-P, please refer to the following paper,
+*"CACTI-P: Architecture-Level Modeling for SRAM-based Structures with Advanced Leakage Reduction Techniques"*,
+that appeared in ICCAD2011. Please cite the paper, if you use
+Cacti-P in your work. The bibtex entry is provided below for your convenience.
+
+```
+@inproceedings{cacti-p:iccad,
+ author = {Sheng Li and Ke Chen and Jung Ho Ahn and Jay B. Brockman and Norman P. Jouppi},
+ title = {CACTI-P: Architecture-level modeling for SRAM-based structures with advanced leakage reduction techniques},
+ booktitle = {ICCAD: International Conference on Computer-Aided Design},
+ year = {2011},
+ pages = {694-701},
+}
+```
+
+
+***
+
+McPAT uses an opensource XML parser written and kindly specially licensed by Mr. Frank Vanden Berghen.
+The detailed information about this XML parser can be found at the license information in xmlParse.cc/xmlParse.h
+
+***
+
+McPAT is in its beginning stage. We are still improving the tool.
+Please come back to its website for newer versions.
+McPAT has been constantly and rapidly improved with new models and latest technology.
+Please always refer to its code for most up-to-date and most accurate information.
+If you have any comments, questions, or suggestions, please write to us:
+
+
+Sheng Li
+Sheng.sli@gmail.com
+
+
+
+
diff --git a/XML_Parse.cc b/XML_Parse.cc
deleted file mode 100644
index d6df351..0000000
--- a/XML_Parse.cc
+++ /dev/null
@@ -1,1853 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-#include
-#include "xmlParser.h"
-#include
-#include "XML_Parse.h"
-#include
-
-using namespace std;
-
-void ParseXML::parse(char* filepath)
-{
- unsigned int i,j,k,m,n;
- unsigned int NumofCom_4;
- unsigned int itmp;
- //Initialize all structures
- ParseXML::initialize();
-
- // this open and parse the XML file:
- XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer
-
- XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer
- //get all params in the second layer
- itmp=xNode2.nChildNode("param");
- for(i=0; iOrderofComponents_3layer)
- {
- //___________________________get all system.core0-n________________________________________________
- if (sys.homogeneous_cores==1) OrderofComponents_3layer=0;
- else OrderofComponents_3layer=sys.number_of_cores-1;
- for (i=0; i<=OrderofComponents_3layer; i++)
- {
- xNode3=xNode2.getChildNode("component",i);
- if (xNode3.isEmpty()==1) {
- printf("The value of homogeneous_cores or number_of_cores is not correct!");
- exit(0);
- }
- else{
- if (strstr(xNode3.getAttribute("name"),"core")!=NULL)
- {
- { //For cpu0-cpui
- //Get all params with system.core?
- itmp=xNode3.nChildNode("param");
- for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
-// xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
-// if (xNode3.isEmpty()==1) {
-// printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
-// exit(0);
-// }
-// if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL)
-// {
-//
-// itmp=xNode3.nChildNode("param");
-// for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
- xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
- if (xNode3.isEmpty()==1) {
- printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
- exit(0);
- }
- if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL)
- {
- itmp=xNode3.nChildNode("param");
- for(k=0; k
-#include "xmlParser.h"
-#include
-#include
-using namespace std;
-
-/*
-void myfree(char *t); // {free(t);}
-ToXMLStringTool tx,tx2;
-*/
-//all subnodes at the level of system.core(0-n)
-//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
-
-typedef struct{
- int prediction_width;
- char prediction_scheme[20];
- int predictor_size;
- int predictor_entries;
- int local_predictor_size[20];
- int local_predictor_entries;
- int global_predictor_entries;
- int global_predictor_bits;
- int chooser_predictor_entries;
- int chooser_predictor_bits;
- double predictor_accesses;
-} predictor_systemcore;
-typedef struct{
- int number_entries;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- double total_hits;
- double total_accesses;
- double total_misses;
- double conflicts;
-} itlb_systemcore;
-typedef struct{
- //params
- double icache_config[20];
- int buffer_sizes[20];
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double read_misses;
- double replacements;
- double read_hits;
- double total_hits;
- double total_misses;
- double miss_buffer_access;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double conflicts;
-} icache_systemcore;
-typedef struct{
- //params
- int number_entries;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double write_hits;
- double read_hits;
- double read_misses;
- double write_misses;
- double total_hits;
- double total_misses;
- double conflicts;
-} dtlb_systemcore;
-typedef struct{
- //params
- double dcache_config[20];
- int buffer_sizes[20];
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_access;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
-} dcache_systemcore;
-typedef struct{
- //params
- int BTB_config[20];
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
-} BTB_systemcore;
-typedef struct{
- //all params at the level of system.core(0-n)
- int clock_rate;
- bool opt_local;
- bool x86;
- int machine_bits;
- int virtual_address_width;
- int physical_address_width;
- int opcode_width;
- int micro_opcode_width;
- int instruction_length;
- int machine_type;
- int internal_datapath_width;
- int number_hardware_threads;
- int fetch_width;
- int number_instruction_fetch_ports;
- int decode_width;
- int issue_width;
- int peak_issue_width;
- int commit_width;
- int pipelines_per_core[20];
- int pipeline_depth[20];
- char FPU[20];
- char divider_multiplier[20];
- int ALU_per_core;
- double FPU_per_core;
- int MUL_per_core;
- int instruction_buffer_size;
- int decoded_stream_buffer_size;
- int instruction_window_scheme;
- int instruction_window_size;
- int fp_instruction_window_size;
- int ROB_size;
- int archi_Regs_IRF_size;
- int archi_Regs_FRF_size;
- int phy_Regs_IRF_size;
- int phy_Regs_FRF_size;
- int rename_scheme;
- int checkpoint_depth;
- int register_windows_size;
- char LSU_order[20];
- int store_buffer_size;
- int load_buffer_size;
- int memory_ports;
- char Dcache_dual_pump[20];
- int RAS_size;
- int fp_issue_width;
- int prediction_width;
- int number_of_BTB;
- int number_of_BPT;
-
- //all stats at the level of system.core(0-n)
- double total_instructions;
- double int_instructions;
- double fp_instructions;
- double branch_instructions;
- double branch_mispredictions;
- double committed_instructions;
- double committed_int_instructions;
- double committed_fp_instructions;
- double load_instructions;
- double store_instructions;
- double total_cycles;
- double idle_cycles;
- double busy_cycles;
- double instruction_buffer_reads;
- double instruction_buffer_write;
- double ROB_reads;
- double ROB_writes;
- double rename_accesses;
- double fp_rename_accesses;
- double rename_reads;
- double rename_writes;
- double fp_rename_reads;
- double fp_rename_writes;
- double inst_window_reads;
- double inst_window_writes;
- double inst_window_wakeup_accesses;
- double inst_window_selections;
- double fp_inst_window_reads;
- double fp_inst_window_writes;
- double fp_inst_window_wakeup_accesses;
- double fp_inst_window_selections;
- double archi_int_regfile_reads;
- double archi_float_regfile_reads;
- double phy_int_regfile_reads;
- double phy_float_regfile_reads;
- double phy_int_regfile_writes;
- double phy_float_regfile_writes;
- double archi_int_regfile_writes;
- double archi_float_regfile_writes;
- double int_regfile_reads;
- double float_regfile_reads;
- double int_regfile_writes;
- double float_regfile_writes;
- double windowed_reg_accesses;
- double windowed_reg_transports;
- double function_calls;
- double context_switches;
- double ialu_accesses;
- double fpu_accesses;
- double mul_accesses;
- double cdb_alu_accesses;
- double cdb_mul_accesses;
- double cdb_fpu_accesses;
- double load_buffer_reads;
- double load_buffer_writes;
- double load_buffer_cams;
- double store_buffer_reads;
- double store_buffer_writes;
- double store_buffer_cams;
- double store_buffer_forwards;
- double main_memory_access;
- double main_memory_read;
- double main_memory_write;
- double pipeline_duty_cycle;
-
- double IFU_duty_cycle ;
- double BR_duty_cycle ;
- double LSU_duty_cycle ;
- double MemManU_I_duty_cycle;
- double MemManU_D_duty_cycle ;
- double ALU_duty_cycle ;
- double MUL_duty_cycle ;
- double FPU_duty_cycle ;
- double ALU_cdb_duty_cycle ;
- double MUL_cdb_duty_cycle ;
- double FPU_cdb_duty_cycle ;
-
- double vdd;
- double power_gating_vcc;
-
- //all subnodes at the level of system.core(0-n)
- predictor_systemcore predictor;
- itlb_systemcore itlb;
- icache_systemcore icache;
- dtlb_systemcore dtlb;
- dcache_systemcore dcache;
- BTB_systemcore BTB;
-
-} system_core;
-typedef struct{
- //params
- int Directory_type;
- double Dir_config[20];
- int buffer_sizes[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- double vdd;
- double power_gating_vcc;
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double read_misses;
- double write_misses;
- double conflicts;
- double duty_cycle;
-} system_L1Directory;
-typedef struct{
- //params
- int Directory_type;
- double Dir_config[20];
- int buffer_sizes[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- double vdd;
- double power_gating_vcc;
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double read_misses;
- double write_misses;
- double conflicts;
- double duty_cycle;
-} system_L2Directory;
-typedef struct{
- //params
- double L2_config[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- int buffer_sizes[20];
- double vdd;
- double power_gating_vcc;
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_accesses;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
- double duty_cycle;
-
- bool merged_dir;
- double homenode_read_accesses;
- double homenode_write_accesses;
- double homenode_read_hits;
- double homenode_write_hits;
- double homenode_read_misses;
- double homenode_write_misses;
- double dir_duty_cycle;
-} system_L2;
-typedef struct{
- //params
- double L3_config[20];
- int clockrate;
- int ports[20];
- int device_type;
- int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
- char threeD_stack[20];
- int buffer_sizes[20];
- double vdd;
- double power_gating_vcc;
- //stats
- double total_accesses;
- double read_accesses;
- double write_accesses;
- double total_hits;
- double total_misses;
- double read_hits;
- double write_hits;
- double read_misses;
- double write_misses;
- double replacements;
- double write_backs;
- double miss_buffer_accesses;
- double fill_buffer_accesses;
- double prefetch_buffer_accesses;
- double prefetch_buffer_writes;
- double prefetch_buffer_reads;
- double prefetch_buffer_hits;
- double wbb_writes;
- double wbb_reads;
- double conflicts;
- double duty_cycle;
-
- bool merged_dir;
- double homenode_read_accesses;
- double homenode_write_accesses;
- double homenode_read_hits;
- double homenode_write_hits;
- double homenode_read_misses;
- double homenode_write_misses;
- double dir_duty_cycle;
-} system_L3;
-typedef struct{
- //params
- int number_of_inputs_of_crossbars;
- int number_of_outputs_of_crossbars;
- int flit_bits;
- int input_buffer_entries_per_port;
- int ports_of_input_buffer[20];
- //stats
- double crossbar_accesses;
-} xbar0_systemNoC;
-typedef struct{
- //params
- int clockrate;
- bool type;
- bool has_global_link;
- char topology[20];
- int horizontal_nodes;
- int vertical_nodes;
- int link_throughput;
- int link_latency;
- int input_ports;
- int output_ports;
- int virtual_channel_per_port;
- int flit_bits;
- int input_buffer_entries_per_vc;
- int ports_of_input_buffer[20];
- int dual_pump;
- int number_of_crossbars;
- char crossbar_type[20];
- char crosspoint_type[20];
- xbar0_systemNoC xbar0;
- int arbiter_type;
- double chip_coverage;
- double vdd;
- double power_gating_vcc;
- //stats
- double total_accesses;
- double duty_cycle;
- double route_over_perc;
-} system_NoC;
-typedef struct{
- //params
- int mem_tech_node;
- int device_clock;
- int peak_transfer_rate;
- int internal_prefetch_of_DRAM_chip;
- int capacity_per_channel;
- int number_ranks;
- int num_banks_of_DRAM_chip;
- int Block_width_of_DRAM_chip;
- int output_width_of_DRAM_chip;
- int page_size_of_DRAM_chip;
- int burstlength_of_DRAM_chip;
- //stats
- double memory_accesses;
- double memory_reads;
- double memory_writes;
-} system_mem;
-typedef struct{
- //params
- //Common Param for mc and fc
- double peak_transfer_rate;
- int number_mcs;
- bool withPHY;
- int type;
-
- //FCParam
- //stats
- double duty_cycle;
- double total_load_perc;
-
- //McParam
- int mc_clock;
- int llc_line_length;
- int memory_channels_per_mc;
- int number_ranks;
- int req_window_size_per_channel;
- int IO_buffer_size_per_channel;
- int databus_width;
- int addressbus_width;
- bool LVDS;
- double vdd;
- double power_gating_vcc;
-
- //stats
- double memory_accesses;
- double memory_reads;
- double memory_writes;
-} system_mc;
-
-typedef struct{
- //params
- int clockrate;
- int number_units;
- int type;
- double vdd;
- double power_gating_vcc;
- //stats
- double duty_cycle;
- double total_load_perc;
-} system_niu;
-
-typedef struct{
- //params
- int clockrate;
- int number_units;
- int num_channels;
- int type;
- bool withPHY;
- double vdd;
- double power_gating_vcc;
- //stats
- double duty_cycle;
- double total_load_perc;
-} system_pcie;
-
-typedef struct{
- //All number_of_* at the level of 'system' Ying 03/21/2009
- int number_of_cores;
- int number_of_L1Directories;
- int number_of_L2Directories;
- int number_of_L2s;
- bool Private_L2;
- int number_of_L3s;
- int number_of_NoCs;
- int number_of_dir_levels;
- int domain_size;
- int first_level_dir;
- // All params at the level of 'system'
- int homogeneous_cores;
- int homogeneous_L1Directories;
- int homogeneous_L2Directories;
- double core_tech_node;
- int target_core_clockrate;
- int target_chip_area;
- int temperature;
- int number_cache_levels;
- int L1_property;
- int L2_property;
- int homogeneous_L2s;
- int L3_property;
- int homogeneous_L3s;
- int homogeneous_NoCs;
- int homogeneous_ccs;
- int Max_area_deviation;
- int Max_power_deviation;
- int device_type;
- bool longer_channel_device;
- bool power_gating;
- bool Embedded;
- bool opt_dynamic_power;
- bool opt_lakage_power;
- bool opt_clockrate;
- bool opt_area;
- int interconnect_projection_type;
- int machine_bits;
- int virtual_address_width;
- int physical_address_width;
- int virtual_memory_page_size;
- double total_cycles;
- double vdd;
- double power_gating_vcc;
- //system.core(0-n):3rd level
- system_core core[64];
- system_L1Directory L1Directory[64];
- system_L2Directory L2Directory[64];
- system_L2 L2[64];
- system_L3 L3[64];
- system_NoC NoC[64];
- system_mem mem;
- system_mc mc;
- system_mc flashc;
- system_niu niu;
- system_pcie pcie;
-} root_system;
-
-class ParseXML
-{
-public:
- void parse(char* filepath);
- void initialize();
-public:
- root_system sys;
-};
-
-
-#endif /* XML_PARSE_H_ */
-
-
-
-
diff --git a/arch_const.h b/arch_const.h
deleted file mode 100644
index feffa6c..0000000
--- a/arch_const.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#ifndef ARCH_CONST_H_
-#define ARCH_CONST_H_
-
-typedef struct{
- unsigned int capacity;
- unsigned int assoc;//fully
- unsigned int blocksize;
-} array_inputs;
-
-//Do Not change, unless you want to bypass the XML interface and do not care about the default values.
-//Global parameters
-const int number_of_cores = 8;
-const int number_of_L2s = 1;
-const int number_of_L3s = 1;
-const int number_of_NoCs = 1;
-
-const double archi_F_sz_nm = 90.0;
-const unsigned int dev_type = 0;
-const double CLOCKRATE = 1.2*1e9;
-const double AF = 0.5;
-//const bool inorder = true;
-const bool embedded = false; //NEW
-
-const bool homogeneous_cores = true;
-const bool temperature = 360;
-const int number_cache_levels = 3;
-const int L1_property = 0; //private 0; coherent 1, shared 2.
-const int L2_property = 2;
-const bool homogeneous_L2s = true;
-const bool L3_property = 2;
-const bool homogeneous_L3s = true;
-const double Max_area_deviation = 50;
-const double Max_dynamic_deviation =50; //New
-const int opt_dynamic_power = 1;
-const int opt_lakage_power = 0;
-const int opt_area = 0;
-const int interconnect_projection_type = 0;
-
-//******************************Core Parameters
-#if (inorder)
-const int opcode_length = 8;//Niagara
-const int reg_length = 5;//Niagara
-const int instruction_length = 32;//Niagara
-const int data_width = 64;
-#else
-const int opcode_length = 8;//16;//Niagara
-const int reg_length = 7;//Niagara
-const int instruction_length = 32;//Niagara
-const int data_width = 64;
-#endif
-
-
-//Caches
-//itlb
-const int itlbsize=512;
-const int itlbassoc=0;//fully
-const int itlbblocksize=8;
-//icache
-const int icachesize=32768;
-const int icacheassoc=4;
-const int icacheblocksize=32;
-//dtlb
-const int dtlbsize=512;
-const int dtlbassoc=0;//fully
-const int dtlbblocksize=8;
-//dcache
-const int dcachesize=32768;
-const int dcacheassoc=4;
-const int dcacheblocksize=32;
-const int dcache_write_buffers=8;
-
-//cache controllers
-//IB,
-const int numIBEntries = 64;
-const int IBsize = 64;//2*4*instruction_length/8*2;
-const int IBassoc = 0;//In Niagara it is still fully associ
-const int IBblocksize = 4;
-
-//IFB and MIL should have the same parameters CAM
-const int IFBsize=128;//
-const int IFBassoc=0;//In Niagara it is still fully associ
-const int IFBblocksize=4;
-
-
-
-
-const int icache_write_buffers=8;
-
-//register file RAM
-const int regfilesize=5760;
-const int regfileassoc=1;
-const int regfileblocksize=18;
-//regwin RAM
-const int regwinsize=256;
-const int regwinassoc=1;
-const int regwinblocksize=8;
-
-
-
-//store buffer, lsq
-const int lsqsize=512;
-const int lsqassoc=0;
-const int lsqblocksize=8;
-
-//data fill queue RAM
-const int dfqsize=1024;
-const int dfqassoc=1;
-const int dfqblocksize=16;
-
-//outside the cores
-//L2 cache bank
-const int l2cachesize=262144;
-const int l2cacheassoc=16;
-const int l2cacheblocksize=64;
-
-//L2 directory
-const int l2dirsize=1024;
-const int l2dirassoc=0;
-const int l2dirblocksize=2;
-
-//crossbar
-//PCX
-const int PCX_NUMBER_INPUT_PORTS_CROSSBAR = 8;
-const int PCX_NUMBER_OUTPUT_PORTS_CROSSBAR = 9;
-const int PCX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =144;
-//PCX buffer RAM
-const int pcx_buffersize=1024;
-const int pcx_bufferassoc=1;
-const int pcx_bufferblocksize=32;
-const int pcx_numbuffer=5;
-//pcx arbiter
-const int pcx_arbsize=128;
-const int pcx_arbassoc=1;
-const int pcx_arbblocksize=2;
-const int pcx_numarb=5;
-
-//CPX
-const int CPX_NUMBER_INPUT_PORTS_CROSSBAR = 5;
-const int CPX_NUMBER_OUTPUT_PORTS_CROSSBAR = 8;
-const int CPX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =150;
-//CPX buffer RAM
-const int cpx_buffersize=1024;
-const int cpx_bufferassoc=1;
-const int cpx_bufferblocksize=32;
-const int cpx_numbuffer=8;
-//cpx arbiter
-const int cpx_arbsize=128;
-const int cpx_arbassoc=1;
-const int cpx_arbblocksize=2;
-const int cpx_numarb=8;
-
-
-
-
-
-const int numPhysFloatRegs=256;
-const int numPhysIntRegs=32;
-const int numROBEntries=192;
-const int umRobs=1;
-
-const int BTBEntries=4096;
-const int BTBTagSize=16;
-const int LFSTSize=1024;
-const int LQEntries=32;
-const int RASSize=16;
-const int SQEntries=32;
-const int SSITSize=1024;
-const int activity=0;
-const int backComSize=5;
-const int cachePorts=200;
-const int choiceCtrBits=2;
-const int choicePredictorSize=8192;
-
-
-const int commitWidth=8;
-const int decodeWidth=8;
-const int dispatchWidth=8;
-const int fetchWidth=8;
-const int issueWidth=1;
-const int renameWidth=8;
-//what is this forwardComSize=5??
-
-const int globalCtrBits=2;
-const int globalHistoryBits=13;
-const int globalPredictorSize=8192;
-
-
-
-const int localCtrBits=2;
-const int localHistoryBits=11;
-const int localHistoryTableSize=2048;
-const int localPredictorSize=2048;
-
-const double Woutdrvnandn =30 *0.09;//(24.0 * LSCALE)
-const double Woutdrvnandp =12.5 *0.09;//(10.0 * LSCALE)
-const double Woutdrvnorn =7.5*0.09;//(6.0 * LSCALE)
-const double Woutdrvnorp =50 * 0.09;// (40.0 * LSCALE)
-const double Woutdrivern =60*0.09;//(48.0 * LSCALE)
-const double Woutdriverp =100 * 0.09;//(80.0 * LSCALE)
-
-/*
-smtCommitPolicy=RoundRobin
-smtFetchPolicy=SingleThread
-smtIQPolicy=Partitioned
-smtIQThreshold=100
-smtLSQPolicy=Partitioned
-smtLSQThreshold=100
-smtNumFetchingThreads=1
-smtROBPolicy=Partitioned
-smtROBThreshold=100
-squashWidth=8
-*/
-
-/*
-prefetch_access=false
-prefetch_cache_check_push=true
-prefetch_data_accesses_only=false
-prefetch_degree=1
-prefetch_latency=10000
-prefetch_miss=false
-prefetch_past_page=false
-prefetch_policy=none
-prefetch_serial_squash=false
-prefetch_use_cpu_id=true
-prefetcher_size=100
-prioritizeRequests=false
-repl=Null
-
-
-split=false
-split_size=0
-subblock_size=0
-tgts_per_mshr=20
-trace_addr=0
-two_queue=false
-
-cpu_side=system.cpu0.dcache_port
-mem_side=system.tol2bus.port[2]
-*/
-
-//[system.cpu0.dtb]
-//type=AlphaDT
-
-
-#endif /* ARCH_CONST_H_ */
diff --git a/array.cc b/array.cc
deleted file mode 100644
index 85e617b..0000000
--- a/array.cc
+++ /dev/null
@@ -1,343 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#define GLOBALVAR
-#include "area.h"
-#include "decoder.h"
-#include "parameter.h"
-#include "array.h"
-#include
-#include
-#include
-#include "globalvar.h"
-
-using namespace std;
-
-ArrayST::ArrayST(const InputParameter *configure_interface,
- string _name,
- enum Device_ty device_ty_,
- bool opt_local_,
- enum Core_type core_ty_,
- bool _is_default)
-:l_ip(*configure_interface),
- name(_name),
- device_ty(device_ty_),
- opt_local(opt_local_),
- core_ty(core_ty_),
- is_default(_is_default)
- {
-
- if (l_ip.cache_sz<64) l_ip.cache_sz=64;
- if (l_ip.power_gating && (l_ip.assoc==0)) {l_ip.power_gating = false;}
- l_ip.error_checking();//not only do the error checking but also fill some missing parameters
- optimize_array();
-
-}
-
-
-void ArrayST::compute_base_power()
- {
- //l_ip.out_w =l_ip.line_sz*8;
- local_result=cacti_interface(&l_ip);
- assert(local_result.cycle_time>0);
- assert(local_result.access_time>0);
-// if (name == "Int FrontRAT")
-// {
-// cout< (candidate_iter)->power.readOp.dynamic)
- {
- min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
- min_dynamic_energy_iter = candidate_iter;
- local_result = *(min_dynamic_energy_iter);
- //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
-
- }
- else
- {
- candidate_iter->cleanup() ;
- }
-
- }
-
-
- }
- candidate_solutions.clear();
- }
-
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
- double pg_reduction = power_gating_leakage_reduction(false);//array structure all retain state;
-
- double macro_layout_overhead = g_tp.macro_layout_overhead;
- double chip_PR_overhead = g_tp.chip_layout_overhead;
- double total_overhead = macro_layout_overhead*chip_PR_overhead;
- local_result.area *= total_overhead;
-
- //maintain constant power density
- double pppm_t[4] = {total_overhead,1,1,total_overhead};
-
- double sckRation = g_tp.sckt_co_eff;
- local_result.power.readOp.dynamic *= sckRation;
- local_result.power.writeOp.dynamic *= sckRation;
- local_result.power.searchOp.dynamic *= sckRation;
- local_result.power.readOp.leakage *= l_ip.nbanks;
- local_result.power.readOp.longer_channel_leakage =
- local_result.power.readOp.leakage*long_channel_device_reduction;
-
- if (l_ip.assoc==0)//only use this function for CAM/FA since other array types compute pg leakage automatically
- {
- local_result.power.readOp.power_gated_leakage =
- local_result.power.readOp.leakage*pg_reduction;
- }
- else
- {
- local_result.power.readOp.power_gated_leakage *= l_ip.nbanks;//normal array types
- }
-
- local_result.power.readOp.power_gated_with_long_channel_leakage = local_result.power.readOp.power_gated_leakage * long_channel_device_reduction;//power-gating atop long channel
-
- local_result.power = local_result.power* pppm_t;
-
-
- local_result.data_array2->power.readOp.dynamic *= sckRation;
- local_result.data_array2->power.writeOp.dynamic *= sckRation;
- local_result.data_array2->power.searchOp.dynamic *= sckRation;
- local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.data_array2->power.readOp.longer_channel_leakage =
- local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
- if (l_ip.assoc==0)//only use this function for CAM/FA since other array types compute pg leakage automatically
- {
- local_result.data_array2->power.readOp.power_gated_leakage =
- local_result.data_array2->power.readOp.leakage*pg_reduction;
- }
- else
- {
- local_result.data_array2->power.readOp.power_gated_leakage *= l_ip.nbanks;//normal array types
- }
- local_result.data_array2->power.readOp.power_gated_with_long_channel_leakage = local_result.data_array2->power.readOp.power_gated_leakage * long_channel_device_reduction;
-
- local_result.data_array2->power = local_result.data_array2->power* pppm_t;
-
-
- if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
- {
- local_result.tag_array2->power.readOp.dynamic *= sckRation;
- local_result.tag_array2->power.writeOp.dynamic *= sckRation;
- local_result.tag_array2->power.searchOp.dynamic *= sckRation;
- local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.tag_array2->power.readOp.power_gated_leakage *= l_ip.nbanks;
- local_result.tag_array2->power.readOp.longer_channel_leakage =
- local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
-
- local_result.tag_array2->power.readOp.power_gated_with_long_channel_leakage =
- local_result.tag_array2->power.readOp.power_gated_leakage*long_channel_device_reduction;
- local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
- }
-
-
-}
-
-void ArrayST::leakage_feedback(double temperature)//TODO: add the code to process power-gating leakage
-{
- // Update the temperature. l_ip is already set and error-checked in the creator function.
- l_ip.temp = (unsigned int)round(temperature/10.0)*10;
-
- // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
- reconfigure(&l_ip,&local_result);
-
- // Scale the power values. This is part of ArrayST::optimize_array().
- double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
-
- double macro_layout_overhead = g_tp.macro_layout_overhead;
- double chip_PR_overhead = g_tp.chip_layout_overhead;
- double total_overhead = macro_layout_overhead*chip_PR_overhead;
-
- double pppm_t[4] = {total_overhead,1,1,total_overhead};
-
- double sckRation = g_tp.sckt_co_eff;
- local_result.power.readOp.dynamic *= sckRation;
- local_result.power.writeOp.dynamic *= sckRation;
- local_result.power.searchOp.dynamic *= sckRation;
- local_result.power.readOp.leakage *= l_ip.nbanks;
- local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction;
- local_result.power = local_result.power* pppm_t;
-
- local_result.data_array2->power.readOp.dynamic *= sckRation;
- local_result.data_array2->power.writeOp.dynamic *= sckRation;
- local_result.data_array2->power.searchOp.dynamic *= sckRation;
- local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
- local_result.data_array2->power = local_result.data_array2->power* pppm_t;
-
- if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
- {
- local_result.tag_array2->power.readOp.dynamic *= sckRation;
- local_result.tag_array2->power.writeOp.dynamic *= sckRation;
- local_result.tag_array2->power.searchOp.dynamic *= sckRation;
- local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
- local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
- local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
- }
-}
-
-ArrayST:: ~ArrayST()
-{
- local_result.cleanup();
-}
diff --git a/basic_components.cc b/basic_components.cc
deleted file mode 100644
index 371cf23..0000000
--- a/basic_components.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "basic_components.h"
-#include
-#include
-#include
-
-double longer_channel_device_reduction(
- enum Device_ty device_ty,
- enum Core_type core_ty)
-{
-
- double longer_channel_device_percentage_core;
- double longer_channel_device_percentage_uncore;
- double longer_channel_device_percentage_llc;
-
- double long_channel_device_reduction;
-
- longer_channel_device_percentage_llc = 1.0;
- longer_channel_device_percentage_uncore = 0.82;
- if (core_ty==OOO)
- {
- longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
- //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
-
- }
- else
- {
- longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
- //longer_channel_device_percentage_uncore = 0.9;//Niagara
- }
-
- if (device_ty==Core_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_core)
- + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
- }
- else if (device_ty==Uncore_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
- + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
- }
- else if (device_ty==LLC_device)
- {
- long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
- + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
- }
- else
- {
- cout<<"unknown device category"<
-
-const double cdb_overhead = 1.1;
-
-enum FU_type {
- FPU,
- ALU,
- MUL
-};
-
-enum Renaming_type {
- RAMbased,
- CAMbased
-};
-
-enum Scheduler_type {
- PhysicalRegFile,
- ReservationStation
-};
-
-enum cache_level {
- L2,
- L3,
- L1Directory,
- L2Directory
-};
-
-enum MemoryCtrl_type {
- MC, //memory controller
- FLASHC //flash controller
-};
-
-enum Dir_type {
- ST,//shadowed tag
- DC,//directory cache
- SBT,//static bank tag
- NonDir
-
-};
-
-enum Cache_policy {
- Write_through,
- Write_back
-};
-
-enum Device_ty {
- Core_device,
- Uncore_device,
- LLC_device
-};
-
-enum Core_type {
- OOO,
- Inorder
-};
-
-class statsComponents
-{
- public:
- double access;
- double hit;
- double miss;
-
- statsComponents() : access(0), hit(0), miss(0) {}
- statsComponents(const statsComponents & obj) { *this = obj; }
- statsComponents & operator=(const statsComponents & rhs)
- {
- access = rhs.access;
- hit = rhs.hit;
- miss = rhs.miss;
- return *this;
- }
- void reset() { access = 0; hit = 0; miss = 0;}
-
- friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
- friend statsComponents operator*(const statsComponents & x, double const * const y);
-};
-
-class statsDef
-{
- public:
- statsComponents readAc;
- statsComponents writeAc;
- statsComponents searchAc;
-
- statsDef() : readAc(), writeAc(),searchAc() { }
- void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
-
- friend statsDef operator+(const statsDef & x, const statsDef & y);
- friend statsDef operator*(const statsDef & x, double const * const y);
-};
-
-double longer_channel_device_reduction(
- enum Device_ty device_ty=Core_device,
- enum Core_type core_ty=Inorder);
-
-double power_gating_leakage_reduction(
- bool retain_state=false);
-
-class CoreDynParam {
-public:
- CoreDynParam(){};
- CoreDynParam(ParseXML *XML_interface, int ithCore_);
- // :XML(XML_interface),
- // ithCore(ithCore_)
- // core_ty(inorder),
- // rm_ty(CAMbased),
- // scheu_ty(PhysicalRegFile),
- // clockRate(1e9),//1GHz
- // arch_ireg_width(32),
- // arch_freg_width(32),
- // phy_ireg_width(128),
- // phy_freg_width(128),
- // perThreadState(8),
- // globalCheckpoint(32),
- // instructionLength(32){};
- //ParseXML * XML;
- bool opt_local;
- bool x86;
- bool Embedded;
- enum Core_type core_ty;
- enum Renaming_type rm_ty;
- enum Scheduler_type scheu_ty;
- double clockRate,executionTime;
- int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width, hthread_width;
- int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
- int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
- int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
- int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
- int num_alus, num_muls;
- double num_fpus;
- int int_data_width, fp_data_width,v_address_width, p_address_width;
- double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
- bool regWindowing,multithreaded;
- double pppm_lkg_multhread[4];
- double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
- MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
- FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
- FPU_cdb_duty_cycle;
- double vdd;
- double power_gating_vcc;
- ~CoreDynParam(){};
-};
-
-class CacheDynParam {
-public:
- CacheDynParam(){};
- CacheDynParam(ParseXML *XML_interface, int ithCache_);
- string name;
- enum Dir_type dir_ty;
- double clockRate,executionTime;
- double capacity, blockW, assoc, nbanks;
- double throughput, latency;
- double duty_cycle, dir_duty_cycle;
- //double duty_cycle;
- int missb_size, fu_size, prefetchb_size, wbb_size;
- double vdd;
- double power_gating_vcc;
- ~CacheDynParam(){};
-};
-
-class MCParam {
-public:
- MCParam(){};
- MCParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate,num_mcs, peakDataTransferRate, num_channels;
- // double mcTEPowerperGhz;
- // double mcPHYperGbit;
- // double area;
- int llcBlockSize, dataBusWidth, addressBusWidth;
- int opcodeW;
- int memAccesses;
- int memRank;
- int type;
- double frontend_duty_cycle, duty_cycle, perc_load;
- double executionTime, reads, writes;
- bool LVDS, withPHY;
- double vdd;
- double power_gating_vcc;
- ~MCParam(){};
-};
-
-class NoCParam {
-public:
- NoCParam(){};
- NoCParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int flit_size;
- int input_ports, output_ports, min_ports, global_linked_ports;
- int virtual_channel_per_port,input_buffer_entries_per_vc;
- int horizontal_nodes,vertical_nodes, total_nodes;
- double executionTime, total_access, link_throughput,link_latency,
- duty_cycle, chip_coverage, route_over_perc;
- bool has_global_link, type;
- double vdd;
- double power_gating_vcc;
- ~NoCParam(){};
-};
-
-class ProcParam {
-public:
- ProcParam(){};
- ProcParam(ParseXML *XML_interface, int ithCache_);
- string name;
- int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
- bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
- double vdd;
- double power_gating_vcc;
- ~ProcParam(){};
-};
-
-class NIUParam {
-public:
- NIUParam(){};
- NIUParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int num_units;
- int type;
- double duty_cycle, perc_load;
- double vdd;
- double power_gating_vcc;
- ~NIUParam(){};
-};
-
-class PCIeParam {
-public:
- PCIeParam(){};
- PCIeParam(ParseXML *XML_interface, int ithCache_);
- string name;
- double clockRate;
- int num_channels, num_units;
- bool withPHY;
- int type;
- double duty_cycle, perc_load;
- double vdd;
- double power_gating_vcc;
- ~PCIeParam(){};
-};
-#endif /* BASIC_COMPONENTS_H_ */
diff --git a/cacti/Ucache.cc b/cacti/Ucache.cc
deleted file mode 100644
index 946ed9d..0000000
--- a/cacti/Ucache.cc
+++ /dev/null
@@ -1,1122 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-#include
-#include
-
-
-#include "area.h"
-#include "bank.h"
-#include "basic_circuit.h"
-#include "component.h"
-#include "const.h"
-#include "decoder.h"
-#include "parameter.h"
-#include "Ucache.h"
-#include "subarray.h"
-#include "uca.h"
-
-#include
-#include
-#include
-#include
-
-using namespace std;
-
-const uint32_t nthreads = NTHREADS;
-
-
-void min_values_t::update_min_values(const min_values_t * val)
-{
- min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
- min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
- min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
- min_area = (min_area > val->min_area) ? val->min_area : min_area;
- min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
-}
-
-
-
-void min_values_t::update_min_values(const uca_org_t & res)
-{
- min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
- min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
- min_area = (min_area > res.area) ? res.area : min_area;
- min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
-}
-
-void min_values_t::update_min_values(const nuca_org_t * res)
-{
- min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
- min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
- min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
- min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
-}
-
-void min_values_t::update_min_values(const mem_array * res)
-{
- min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
- min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
- min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
- min_area = (min_area > res->area) ? res->area : min_area;
- min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
-}
-
-
-
-void * calc_time_mt_wrapper(void * void_obj)
-{
- calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
- uint32_t tid = calc_obj->tid;
- list & data_arr = calc_obj->data_arr;
- list & tag_arr = calc_obj->tag_arr;
- bool is_tag = calc_obj->is_tag;
- bool pure_ram = calc_obj->pure_ram;
- bool pure_cam = calc_obj->pure_cam;
- bool is_main_mem = calc_obj->is_main_mem;
- double Nspd_min = calc_obj->Nspd_min;
- min_values_t * data_res = calc_obj->data_res;
- min_values_t * tag_res = calc_obj->tag_res;
-
- data_arr.clear();
- data_arr.push_back(new mem_array);
- tag_arr.clear();
- tag_arr.push_back(new mem_array);
-
- uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
- uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
- uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
-
-
- bool is_valid_partition;
- int wt_min, wt_max;
-
- if (g_ip->force_wiretype) {
- if (g_ip->wt == 0) {
- wt_min = Low_swing;
- wt_max = Low_swing;
- }
- else {
- wt_min = Global;
- wt_max = Low_swing-1;
- }
- }
- else {
- wt_min = Global;
- wt_max = Low_swing;
- }
-
- for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
- {
- for (int wr = wt_min; wr <= wt_max; wr++)
- {
- for (uint32_t iter = tid; iter < niter; iter += nthreads)
- {
- // reconstruct Ndwl, Ndbl, Ndcm
- unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
- unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
- unsigned int Ndcm = 1 << (iter % Ndcm_niter);
- for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
- {
- for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
- {
- //for debuging
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = g_ip->wt;
- Ndwl = g_ip->ndwl;
- Ndbl = g_ip->ndbl;
- Ndcm = g_ip->ndcm;
- if(g_ip->nspd != 0) {
- Nspd = g_ip->nspd;
- }
- if(g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = g_ip->ndsam1;
- Ndsam_lev_2 = g_ip->ndsam2;
- }
- }
-
- if (is_tag == true)
- {
- is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- tag_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
- // If it's a fully-associative cache, the data array partition parameters are identical to that of
- // the tag array, so compute data array partition properties also here.
- if (is_tag == false || g_ip->fully_assoc)
- {
- is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
- Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
- data_arr.back(), 0, NULL, NULL,
- is_main_mem);
- }
-
- if (is_valid_partition)
- {
- if (is_tag == true)
- {
- tag_arr.back()->wt = (enum Wire_type) wr;
- tag_res->update_min_values(tag_arr.back());
- tag_arr.push_back(new mem_array);
- }
- if (is_tag == false || g_ip->fully_assoc)
- {
- data_arr.back()->wt = (enum Wire_type) wr;
- data_res->update_min_values(data_arr.back());
- data_arr.push_back(new mem_array);
- }
- }
-
- if (g_ip->force_cache_config && is_tag == false)
- {
- wr = wt_max;
- iter = niter;
- if(g_ip->nspd != 0) {
- Nspd = MAXDATASPD;
- }
- if (g_ip->ndsam1 != 0) {
- Ndsam_lev_1 = MAX_COL_MUX+1;
- Ndsam_lev_2 = MAX_COL_MUX+1;
- }
- }
- }
- }
- }
- }
- }
-
- delete data_arr.back();
- delete tag_arr.back();
- data_arr.pop_back();
- tag_arr.pop_back();
-
- pthread_exit(NULL);
-}
-
-
-
-bool calculate_time(
- bool is_tag,
- int pure_ram,
- bool pure_cam,
- double Nspd,
- unsigned int Ndwl,
- unsigned int Ndbl,
- unsigned int Ndcm,
- unsigned int Ndsam_lev_1,
- unsigned int Ndsam_lev_2,
- mem_array *ptr_array,
- int flag_results_populate,
- results_mem_array *ptr_results,
- uca_org_t *ptr_fin_res,
- bool is_main_mem)
-{
- DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
-
- if (dyn_p.is_valid == false)
- {
- return false;
- }
-
- UCA * uca = new UCA(dyn_p);
-
-
- if (flag_results_populate)
- { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
- }
- else
- {
-
- collect_uca_results(Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, uca, ptr_array, is_main_mem);
- }
-
- delete uca;
- return true;
-}
-
-void collect_uca_results(
-// bool is_tag,
-// int pure_ram,
-// bool pure_cam,
- double Nspd,
- unsigned int Ndwl,
- unsigned int Ndbl,
- unsigned int Ndcm,
- unsigned int Ndsam_lev_1,
- unsigned int Ndsam_lev_2,
- UCA const * const uca,
- mem_array * const ptr_array,
-// int flag_results_populate,
-// results_mem_array *ptr_results,
-// uca_org_t *ptr_fin_res,
- bool is_main_mem)
-{
- int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
- int num_mats = uca->bank.dp.num_mats;
- bool is_fa = uca->bank.dp.fully_assoc;
- bool pure_cam = uca->bank.dp.pure_cam;
- ptr_array->Ndwl = Ndwl;
- ptr_array->Ndbl = Ndbl;
- ptr_array->Nspd = Nspd;
- ptr_array->deg_bl_muxing = uca->bank.dp.deg_bl_muxing;
- ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
- ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
- ptr_array->access_time = uca->access_time;
- ptr_array->cycle_time = uca->cycle_time;
- ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
- ptr_array->area_ram_cells = uca->area_all_dataramcells;
- ptr_array->area = uca->area.get_area();
- ptr_array->height = uca->area.h;
- ptr_array->width = uca->area.w;
- ptr_array->mat_height = uca->bank.mat.area.h;
- ptr_array->mat_length = uca->bank.mat.area.w;
- ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
- ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
- ptr_array->power = uca->power;
- ptr_array->delay_senseamp_mux_decoder =
- MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
- uca->delay_array_to_sa_mux_lev_2_decoder);
- ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
- ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
-
- ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
- ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
- ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
- ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
- ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
- ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
- ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
- ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
- ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
- ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
-
- ptr_array->all_banks_height = uca->area.h;
- ptr_array->all_banks_width = uca->area.w;
- ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
-
- ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
- ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
- ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
-// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power;
-// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
- ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
- ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
- ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
- ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
- ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
- ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
- ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
- ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_bitlines = uca->bank.mat.power_bitline;
- ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_sense_amps = uca->bank.mat.power_sa;
- ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
- ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
- ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
-
- ptr_array->power_comparators = uca->bank.mat.power_comparator;
- ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
- ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
-
-// cout << " num of mats: " << dyn_p.num_mats << endl;
- if (is_fa || pure_cam)
- {
- ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
-// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power;
-// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline;
-// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats;
- ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
- ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchlines = uca->bank.mat.power_matchline;
- ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
- ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
- ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
-// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy;
- ptr_array->read_energy = uca->read_energy;
- ptr_array->write_energy = uca->write_energy;
- ptr_array->precharge_energy = uca->precharge_energy;
- ptr_array->refresh_power = uca->refresh_power;
- ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
- ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
- ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
-
- ptr_array->precharge_delay = uca->precharge_delay;
-
-
-// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<array_leakage= uca->bank.array_leakage;
- ptr_array->wl_leakage= uca->bank.wl_leakage;
- ptr_array->cl_leakage= uca->bank.cl_leakage;
- if (g_ip->power_gating)
- {
- ptr_array->sram_sleep_tx_width= uca->bank.mat.sram_sleep_tx->width;
- ptr_array->sram_sleep_tx_area= uca->bank.mat.array_sleep_tx_area;
- ptr_array->sram_sleep_wakeup_latency= uca->bank.mat.array_wakeup_t;
- ptr_array->sram_sleep_wakeup_energy= uca->bank.mat.array_wakeup_e.readOp.dynamic;
-
- ptr_array->wl_sleep_tx_width= uca->bank.mat.row_dec->sleeptx->width;
- ptr_array->wl_sleep_tx_area= uca->bank.mat.wl_sleep_tx_area;
- ptr_array->wl_sleep_wakeup_latency= uca->bank.mat.wl_wakeup_t;
- ptr_array->wl_sleep_wakeup_energy= uca->bank.mat.wl_wakeup_e.readOp.dynamic;
-
- ptr_array->bl_floating_wakeup_latency= uca->bank.mat.blfloating_wakeup_t;
- ptr_array->bl_floating_wakeup_energy= uca->bank.mat.blfloating_wakeup_e.readOp.dynamic;
-
-
- }
-
-
- ptr_array->num_active_mats = uca->bank.dp.num_act_mats_hor_dir;
- ptr_array->num_submarray_mats = uca->bank.mat.num_subarrays_per_mat;
- // cout<<"array_leakage"<array_leakage<wl_leakage<cl_leakage<long_channel_leakage_reduction_periperal = uca->long_channel_leakage_reduction_periperal;
- ptr_array->long_channel_leakage_reduction_memcell = uca->long_channel_leakage_reduction_memcell;
-
-}
-
-
-bool check_uca_org(uca_org_t & u, min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
-}
-
-bool check_mem_org(mem_array & u, const min_values_t *minval)
-{
- if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
- return false;
- }
- if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
- g_ip->dynamic_power_dev) {
- return false;
- }
- if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
- g_ip->leakage_power_dev) {
- return false;
- }
- if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
- g_ip->cycle_time_dev) {
- return false;
- }
- if (((u.area - minval->min_area)/minval->min_area)*100 >
- g_ip->area_dev) {
- return false;
- }
- return true;
-}
-
-
-
-
-void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist)
-{
- double cost = 0;
- double min_cost = BIGNUM;
- float d, a, dp, lp, c;
-
- dp = g_ip->dynamic_power_wt;
- lp = g_ip->leakage_power_wt;
- a = g_ip->area_wt;
- d = g_ip->delay_wt;
- c = g_ip->cycle_time_wt;
-
- if (ulist.empty() == true)
- {
- cout << "ERROR: no valid cache organizations found" << endl;
- exit(0);
- }
-
- for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
- {
- if (g_ip->ed == 1)
- {
- cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
- }
- else if (g_ip->ed == 2)
- {
- cost = ((niter)->access_time/minval->min_delay)*
- ((niter)->access_time/minval->min_delay)*
- ((niter)->power.readOp.dynamic/minval->min_dyn);
- if (min_cost > cost)
- {
- min_cost = cost;
- *res = (*(niter));
- }
- }
- else
- {
- /*
- * check whether the current organization
- * meets the input deviation constraints
- */
- bool v = check_uca_org(*niter, minval);
- //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
-
- if (v)
- {
- cost = (d * ((niter)->access_time/minval->min_delay) +
- c * ((niter)->cycle_time/minval->min_cyc) +
- dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
- lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
- a * ((niter)->area/minval->min_area));
- //fprintf(stderr, "cost = %g\n", cost);
-
- if (min_cost > cost) {
- min_cost = cost;
- *res = (*(niter));
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
- }
- }
- else {
- niter = ulist.erase(niter);
- if (niter!=ulist.begin())
- niter--;
- }
- }
- }
-
- if (min_cost == BIGNUM)
- {
- cout << "ERROR: no cache organizations met optimization criteria" << endl;
- exit(0);
- }
-}
-
-
-
-void filter_tag_arr(const min_values_t * min, list & list)
-{
- double cost = BIGNUM;
- double cur_cost;
- double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
- mem_array * res = NULL;
-
- if (list.empty() == true)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(1);
- }
-
-
- while (list.empty() != true)
- {
- bool v = check_mem_org(*list.back(), min);
- if (v)
- {
- cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
- wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
- wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
- wt_area * (list.back()->area/min->min_area) +
- wt_cyc * (list.back()->cycle_time/min->min_cyc);
- }
- else
- {
- cur_cost = BIGNUM;
- }
- if (cur_cost < cost)
- {
- if (res != NULL)
- {
- delete res;
- }
- cost = cur_cost;
- res = list.back();
- }
- else
- {
- delete list.back();
- }
- list.pop_back();
- }
- if(!res)
- {
- cout << "ERROR: no valid tag organizations found" << endl;
- exit(0);
- }
-
- list.push_back(res);
-}
-
-
-
-void filter_data_arr(list & curr_list)
-{
- if (curr_list.empty() == true)
- {
- cout << "ERROR: no valid data array organizations found" << endl;
- exit(1);
- }
-
- list::iterator iter;
-
- for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
- {
- mem_array * m = *iter;
-
- if (m == NULL) exit(1);
-
- if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
- ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
- {
- delete m;
- iter = curr_list.erase(iter);
- iter --;
- }
- }
-}
-
-
-
-/*
- * Performs exhaustive search across different sub-array sizes,
- * wire types and aspect ratios to find an optimal UCA organization
- * 1. First different valid tag array organizations are calculated
- * and stored in tag_arr array
- * 2. The exhaustive search is repeated to find valid data array
- * organizations and stored in data_arr array
- * 3. Cache area, delay, power, and cycle time for different
- * cache organizations are calculated based on the
- * above results
- * 4. Cache model with least cost is picked from sol_list
- */
-void solve(uca_org_t *fin_res)
-{
- bool is_dram = false;
- int pure_ram = g_ip->pure_ram;
- bool pure_cam = g_ip->pure_cam;
-
- init_tech_params(g_ip->F_sz_um, false);
-
-
- list tag_arr (0);
- list data_arr(0);
- list::iterator miter;
- list sol_list(1, uca_org_t());
-
- fin_res->tag_array.access_time = 0;
- fin_res->tag_array.Ndwl = 0;
- fin_res->tag_array.Ndbl = 0;
- fin_res->tag_array.Nspd = 0;
- fin_res->tag_array.deg_bl_muxing = 0;
- fin_res->tag_array.Ndsam_lev_1 = 0;
- fin_res->tag_array.Ndsam_lev_2 = 0;
-
-
- // distribute calculate_time() execution to multiple threads
- calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
- pthread_t threads[nthreads];
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].tid = t;
- calc_array[t].pure_ram = pure_ram;
- calc_array[t].pure_cam = pure_cam;
- calc_array[t].data_res = new min_values_t();
- calc_array[t].tag_res = new min_values_t();
- }
-
- bool is_tag;
- uint32_t ram_cell_tech_type;
-
- // If it's a cache, first calculate the area, delay and power for all tag array partitions.
- if (!(pure_ram||pure_cam||g_ip->fully_assoc))
- { //cache
- is_tag = true;
- ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
- init_tech_params(g_ip->F_sz_um, is_tag);
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = false;
- calc_array[t].Nspd_min = 0.125;
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
- }
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
- }
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- calc_array[t].tag_arr.sort(mem_array::lt);
- tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
- }
- }
-
-
- // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
-// if (!g_ip->fully_assoc)
-// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
- is_tag = false;
- ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
- init_tech_params(g_ip->F_sz_um, is_tag);
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].is_tag = is_tag;
- calc_array[t].is_main_mem = g_ip->is_main_mem;
- if (!(pure_cam||g_ip->fully_assoc))
- {
- calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
- }
- else
- {
- calc_array[t].Nspd_min = 1;
- }
-
- pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
- }
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- pthread_join(threads[t], NULL);
- }
-
- data_arr.clear();
- for (uint32_t t = 0; t < nthreads; t++)
- {
- calc_array[t].data_arr.sort(mem_array::lt);
- data_arr.merge(calc_array[t].data_arr, mem_array::lt);
- }
-// }
-
-
- min_values_t * d_min = new min_values_t();
- min_values_t * t_min = new min_values_t();
- min_values_t * cache_min = new min_values_t();
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- d_min->update_min_values(calc_array[t].data_res);
- t_min->update_min_values(calc_array[t].tag_res);
- }
-
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- (*miter)->arr_min = d_min;
- }
-
-
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
- filter_data_arr(data_arr);
- if(!(pure_ram||pure_cam||g_ip->fully_assoc))
- {
- filter_tag_arr(t_min, tag_arr);
- }
- //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
-
-
- if (pure_ram||pure_cam||g_ip->fully_assoc)
- {
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back(); //essentially adds value to sol_list, with no extra memory copying.
- curr_org.tag_array2 = NULL;
- curr_org.data_array2 = (*miter);
-
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
-
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
-
- sol_list.push_back(uca_org_t());//add a new node to the back
- }
- }
- else
- {
- while (tag_arr.empty() != true)
- {
- mem_array * arr_temp = (tag_arr.back());
- //delete tag_arr.back();
- tag_arr.pop_back();//this causes double free problem if uca_org_t has a destructor to release all contained pointers---when called by sol_list.clear(); so uca_org_t does not use destructor to delete contained pointers
-
- for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
- {
- uca_org_t & curr_org = sol_list.back();
- curr_org.tag_array2 = arr_temp;
- curr_org.data_array2 = (*miter); //try all combinations of tag and data array
-
- curr_org.find_delay();
- curr_org.find_energy();
- curr_org.find_area();
- curr_org.find_cyc();
-
- //update min values for the entire cache
- cache_min->update_min_values(curr_org);
-
- sol_list.push_back(uca_org_t());
- }
- }
- }
-
- sol_list.pop_back();//delete the last unused node added in the loop above
-
- find_optimal_uca(fin_res, cache_min, sol_list);
-
- sol_list.clear();
-
- for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
- {
- if (*miter != fin_res->data_array2)
- {
- delete *miter;
- }
- }
- data_arr.clear();
-
- for (uint32_t t = 0; t < nthreads; t++)
- {
- delete calc_array[t].data_res;
- delete calc_array[t].tag_res;
- }
-
- delete [] calc_array;
- delete cache_min;
- delete d_min;
- delete t_min;
-}
-
-void update_dvs(uca_org_t *fin_res)
-{
- if(fin_res->tag_array2 || fin_res->data_array2)
- {
-// Wire::print_wire();
- Wire winit;//init before changing dvs
-// fin_res->uca_q = vector(g_ip->dvs_voltage.size());
- for (unsigned int i=0; i< g_ip->dvs_voltage.size(); i++)
- {
-
- fin_res->uca_q.push_back(new uca_org_t());
-
- g_ip->hp_Vdd = g_ip->dvs_voltage[i];
- g_ip->specific_hp_vdd = true;
- g_ip->lstp_Vdd = g_ip->dvs_voltage[i];
- g_ip->specific_lstp_vdd = true;
- g_ip->lop_Vdd = g_ip->dvs_voltage[i];
- g_ip->specific_lop_vdd = true;
-// g_ip->power_gating = false;
-// g_ip->bitline_floating = false;
-// g_ip->wl_power_gated = false;
-// g_ip->interconect_power_gated = false;
-// g_ip->cl_power_gated = false;
-// g_ip->array_power_gated = false;
-
- init_tech_params(g_ip->F_sz_um,true);
- winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false);
-// Wire::print_wire();
-
- if(fin_res->tag_array2)
- {
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(tag_arr_dyn_p.is_valid)
- {
-
- UCA * tag_arr = new UCA(tag_arr_dyn_p);
- fin_res->uca_q[i]->tag_array2 = new mem_array();
-
- collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_q[i]->tag_array2, g_ip->is_main_mem);
- delete tag_arr;
- }
-
- }
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(data_arr_dyn_p.is_valid)
- {
- UCA * data_arr = new UCA(data_arr_dyn_p);
- fin_res->uca_q[i]->data_array2 = new mem_array();
- collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_q[i]->data_array2, g_ip->is_main_mem);
- delete data_arr;
- }
-
- fin_res->uca_q[i]->find_delay();
- fin_res->uca_q[i]->find_energy();
- fin_res->uca_q[i]->find_area();
- fin_res->uca_q[i]->find_cyc();
-
-// output_UCA(fin_res->uca_q[i]);
-// Wire::print_wire();
- }
- //reset input to original values in *.cfg file
- g_ip->specific_hp_vdd = false;
- g_ip->specific_lstp_vdd = false;
- g_ip->specific_lop_vdd = false;
- init_tech_params(g_ip->F_sz_um,true);
- }
- else
- {
- cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl;
- exit(1);
- }
-}
-
-void update_pg(uca_org_t *fin_res)
-{
-
- if(fin_res->tag_array2 || fin_res->data_array2)
- {
- Wire winit;
- fin_res->uca_pg_reference = new uca_org_t();
- /*
- if (i == 0) {g_ip->hp_Vdd = 0.8; }
- else g_ip->hp_Vdd = 1.1;
- g_ip->specific_hp_vdd = true;
- cout<<"VDD=====" << g_ip->hp_Vdd <F_sz_um,true);
- winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false);
- Wire::print_wire();
- */
- g_ip->array_power_gated = false;
- g_ip->bitline_floating = false;
- g_ip->wl_power_gated = false;
- g_ip->cl_power_gated = false;
- g_ip->interconect_power_gated = false;
- g_ip->power_gating = false;
-// winit.wire_dvs_update();
-// Wire::print_wire();
-// init_tech_params(g_ip->F_sz_um,true);
-// winit.wire_dvs_update();//Wire::wire_dvs_update();//Wire winit (1,1, false);
-// Wire::print_wire();
- if(fin_res->tag_array2)
- {
- // init_tech_params(g_ip->F_sz_um,true);
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(tag_arr_dyn_p.is_valid)
- {
-
- UCA * tag_arr = new UCA(tag_arr_dyn_p);
- fin_res->uca_pg_reference->tag_array2 = new mem_array();
-
- collect_uca_results(fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->deg_bl_muxing, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, tag_arr, fin_res->uca_pg_reference->tag_array2, g_ip->is_main_mem);
- delete tag_arr;
-
- }
-
- }
- // init_tech_params(g_ip->F_sz_um,false);
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(data_arr_dyn_p.is_valid)
- {
- UCA * data_arr = new UCA(data_arr_dyn_p);
- fin_res->uca_pg_reference->data_array2 = new mem_array();
- collect_uca_results(fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->deg_bl_muxing, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, data_arr, fin_res->uca_pg_reference->data_array2, g_ip->is_main_mem);
- delete data_arr;
- }
-
- fin_res->uca_pg_reference->find_delay();
- fin_res->uca_pg_reference->find_energy();
- fin_res->uca_pg_reference->find_area();
- fin_res->uca_pg_reference->find_cyc();
-
-// output_UCA(fin_res->uca_pg_reference);
-// Wire::print_wire();
- }
- else
- {
- cout << "ERROR: Cannot retrieve array structure for tag and data array" << endl;
- exit(1);
- }
- //reset input to original values in *.cfg file
- g_ip->array_power_gated = true;
- g_ip->bitline_floating = true;
- g_ip->wl_power_gated = true;
- g_ip->cl_power_gated = true;
- g_ip->interconect_power_gated = true;
- g_ip->power_gating = true;
-
-
-}
-
-/* update for thermal
-void update(uca_org_t *fin_res)
-{
- if(fin_res->tag_array2)
- {
- init_tech_params(g_ip->F_sz_um,true);
- DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(tag_arr_dyn_p.is_valid)
- {
- UCA * tag_arr = new UCA(tag_arr_dyn_p);
- fin_res->tag_array2->power = tag_arr->power;
- }
- else
- {
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
- exit(1);
- }
- }
- init_tech_params(g_ip->F_sz_um,false);
- DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
- if(data_arr_dyn_p.is_valid)
- {
- UCA * data_arr = new UCA(data_arr_dyn_p);
- fin_res->data_array2->power = data_arr->power;
- }
- else
- {
- cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
- exit(1);
- }
-
- fin_res->find_energy();
-}
-*/
diff --git a/cacti/arbiter.cc b/cacti/arbiter.cc
deleted file mode 100644
index 6664abf..0000000
--- a/cacti/arbiter.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "arbiter.h"
-
-Arbiter::Arbiter(
- double n_req,
- double flit_size_,
- double output_len,
- TechnologyParameter::DeviceType *dt
- ):R(n_req), flit_size(flit_size_),
- o_len (output_len), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- double technology = g_ip->F_sz_um;
- NTn1 = 13.5*technology/2;
- PTn1 = 76*technology/2;
- NTn2 = 13.5*technology/2;
- PTn2 = 76*technology/2;
- NTi = 12.5*technology/2;
- PTi = 25*technology/2;
- NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology/2; /* pmos tr. length*/
-}
-
-Arbiter::~Arbiter(){}
-
-double
-Arbiter::arb_req() {
- double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
- gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
- return temp;
-}
-
-double
-Arbiter::arb_pri() {
- double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
- of flip-flop is ignored */
- return temp;
-}
-
-
-double
-Arbiter::arb_grant() {
- double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
- return temp;
-}
-
-double
-Arbiter::arb_int() {
- double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
- 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
- return temp;
-}
-
-void
-Arbiter::compute_power() {
- power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
- arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
- double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
- double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
- double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
- power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
- power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
-}
-
-double //wire cap with triple spacing
-Arbiter::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- double temp = (wc.wire_cap(length,true));
- return temp;
-}
-
-double
-Arbiter::crossbar_ctrline() {
- double temp = (Cw3(o_len * 1e-6 /* m */) +
- drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
- gate_C(NTi, 0) + gate_C(PTi, 0));
- return temp;
-}
-
-double
-Arbiter::transmission_buf_ctrcap() {
- double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
- return temp;
-}
-
-
-void Arbiter::print_arbiter()
-{
- cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
-}
-
-
diff --git a/cacti/bank.cc b/cacti/bank.cc
deleted file mode 100755
index 74b2c6d..0000000
--- a/cacti/bank.cc
+++ /dev/null
@@ -1,211 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include "bank.h"
-#include
-
-
-Bank::Bank(const DynamicParameter & dyn_p):
- dp(dyn_p), mat(dp),
- num_addr_b_mat(dyn_p.number_addr_bits_mat),
- num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir),
- array_leakage(0),
- wl_leakage(0),
- cl_leakage(0)
-{
- int RWP;
- int ERP;
- int EWP;
- int SCHP;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- int searchinbits;
- int searchoutbits;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
- datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
- dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
- searchinbits = dp.num_si_b_bank_per_port * SCHP;
- searchoutbits = dp.num_so_b_bank_per_port * SCHP;
- }
-
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- if (g_ip->fast_access && dp.is_tag == false)
- {
- dataoutbits *= g_ip->data_assoc;
- }
-
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
-
-// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
-// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
- else
- {
- htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
- htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
- htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
- htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
- htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
- total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
- }
-
- num_addr_b_row_dec = _log2(mat.subarray.num_rows);
- num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
- num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
-}
-
-
-
-Bank::~Bank()
-{
- delete htree_in_add;
- delete htree_out_data;
- delete htree_in_data;
- if (dp.fully_assoc || dp.pure_cam)
- {
- delete htree_in_search;
- delete htree_out_search;
- }
-}
-
-
-
-double Bank::compute_delays(double inrisetime)
-{
- return mat.compute_delays(inrisetime);
-}
-
-
-
-void Bank::compute_power_energy()
-{
- mat.compute_power_energy();
-
- if (!(dp.fully_assoc || dp.pure_cam))
- {
- power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
- power.readOp.power_gated_leakage += mat.power.readOp.power_gated_leakage * dp.num_mats;
-
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
-
- array_leakage += mat.array_leakage*dp.num_mats;
- wl_leakage += mat.wl_leakage*dp.num_mats;
- cl_leakage += mat.cl_leakage*dp.num_mats;
-
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
-
- power.readOp.power_gated_leakage += htree_in_add->power.readOp.power_gated_leakage;
- power.readOp.power_gated_leakage += htree_in_data->power.readOp.power_gated_leakage;
- power.readOp.power_gated_leakage += htree_out_data->power.readOp.power_gated_leakage;
-
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- }
- else
- {
-
- power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
- power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
- power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
-
- power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
- power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
- mat.power_sa.searchOp.dynamic +
- mat.power_bitline.searchOp.dynamic +
- mat.power_subarray_out_drv.searchOp.dynamic+
- mat.ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
- power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
-
- power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
- power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
-
- power.readOp.leakage += htree_in_add->power.readOp.leakage;
- power.readOp.leakage += htree_in_data->power.readOp.leakage;
- power.readOp.leakage += htree_out_data->power.readOp.leakage;
- power.readOp.leakage += htree_in_search->power.readOp.leakage;
- power.readOp.leakage += htree_out_search->power.readOp.leakage;
-
-
- power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
- power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
-
- }
-
-}
-
diff --git a/cacti/basic_circuit.cc b/cacti/basic_circuit.cc
deleted file mode 100644
index b81f8bf..0000000
--- a/cacti/basic_circuit.cc
+++ /dev/null
@@ -1,942 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-
-#include "basic_circuit.h"
-#include "parameter.h"
-#include
-#include
-#include
-
-uint32_t _log2(uint64_t num)
-{
- uint32_t log2 = 0;
-
- if (num == 0)
- {
- std::cerr << "log0?" << std::endl;
- exit(1);
- }
-
- while (num > 1)
- {
- num = (num >> 1);
- log2++;
- }
-
- return log2;
-}
-
-
-bool is_pow2(int64_t val)
-{
- if (val <= 0)
- {
- return false;
- }
- else if (val == 1)
- {
- return true;
- }
- else
- {
- return (_log2(val) != _log2(val-1));
- }
-}
-
-
-int powers (int base, int n)
-{
- int i, p;
-
- p = 1;
- for (i = 1; i <= n; ++i)
- p *= base;
- return p;
-}
-
-/*----------------------------------------------------------------------*/
-
-double logtwo (double x)
-{
- assert(x > 0);
- return ((double) (log (x) / log (2.0)));
-}
-
-/*----------------------------------------------------------------------*/
-
-
-double gate_C(
- double width,
- double wirelength,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if (_is_dram && _is_cell)
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if (_is_dram && _is_wl_tr)
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if (!_is_dram && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
-}
-
-
-// returns gate capacitance in Farads
-// actually this function is the same as gate_C() now
-double gate_C_pass(
- double width, // gate width in um (length is Lphy_periph_global)
- double wirelength, // poly wire length going to gate in lambda
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- // v5.0
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
-}
-
-
-
-double drain_C_(
- double width,
- int nchannel,
- int stack,
- int next_arg_thresh_folding_width_or_height_cell,
- double fold_dimension,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- double w_folded_tr;
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; // DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; // DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double c_junc_area = dt->C_junc;
- double c_junc_sidewall = dt->C_junc_sidewall;
- double c_fringe = 2*dt->C_fringe;
- double c_overlap = 2*dt->C_overlap;
- double drain_C_metal_connecting_folded_tr = 0;
-
- // determine the width of the transistor after folding (if it is getting folded)
- if (next_arg_thresh_folding_width_or_height_cell == 0)
- { // interpret fold_dimension as the the folding width threshold
- // i.e. the value of transistor width above which the transistor gets folded
- w_folded_tr = fold_dimension;
- }
- else
- { // interpret fold_dimension as the height of the cell that this transistor is part of.
- double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
- // TODO : w_folded_tr must come from Component::compute_gate_area()
- double ratio_p_to_n = 2.0 / (2.0 + 1.0);
- if (nchannel)
- {
- w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
- }
- else
- {
- w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
- }
- }
- int num_folded_tr = (int) (ceil(width / w_folded_tr));
-
- if (num_folded_tr < 2)
- {
- w_folded_tr = width;
- }
-
- double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
- (stack - 1) * g_tp.spacing_poly_to_poly;
- double drain_h_for_sidewall = w_folded_tr;
- double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
- if (num_folded_tr > 1)
- {
- total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
- (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
-
- if (num_folded_tr%2 == 0)
- {
- drain_h_for_sidewall = 0;
- }
- total_drain_height_for_cap_wrt_gate *= num_folded_tr;
- drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
- }
-
- double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
- double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
- double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
-
- return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
-}
-
-
-double tr_R_on(
- double width,
- int nchannel,
- int stack,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && _is_cell)
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (stack * restrans / width);
-}
-
-
-/* This routine operates in reverse: given a resistance, it finds
- * the transistor width that would have this R. It is used in the
- * data wordline to estimate the wordline driver size. */
-
-// returns width in um
-double R_to_w(
- double res,
- int nchannel,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- const TechnologyParameter::DeviceType * dt;
-
- if ((_is_dram) && (_is_cell))
- {
- dt = &g_tp.dram_acc; //DRAM cell access transistor
- }
- else if ((_is_dram) && (_is_wl_tr))
- {
- dt = &g_tp.dram_wl; //DRAM wordline transistor
- }
- else if ((!_is_dram) && (_is_cell))
- {
- dt = &g_tp.sram_cell; // SRAM cell access transistor
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- {
- dt = &g_tp.peri_global;
- }
-
- double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
- return (restrans / res);
-}
-
-
-double pmos_to_nmos_sz_ratio(
- bool _is_dram,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- double p_to_n_sizing_ratio;
- if ((_is_dram) && (_is_wl_tr))
- { //DRAM wordline transistor
- p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
- }
- else if (_is_sleep_tx)
- {
- p_to_n_sizing_ratio = g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
- }
- return p_to_n_sizing_ratio;
-}
-
-
-// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
-double horowitz(
- double inputramptime, // input rise time
- double tf, // time constant of gate
- double vs1, // threshold voltage1/Vdd
- double vs2, // threshold voltage2/vdd
- int rise) // whether input rises or fall
-{
- if (inputramptime == 0 && vs1 == vs2)
- {
- return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
- }
- double a, b, td;
-
- a = inputramptime / tf;
- if (rise == RISE)
- {
- b = 0.5;
- td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
- }
- else
- {
- b = 0.4;
- td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
- }
- return (td);
-}
-
-double cmos_Ileak(
- double nWidth,
- double pWidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
-}
-
-int factorial(int n, int m)
-{
- int fa = m, i;
- for (i=m+1; i<=n; i++)
- fa *=i;
- return fa;
-}
-
-int combination(int n, int m)
-{
- int ret;
- ret = factorial(n, m+1) / factorial(n - m);
- return ret;
-}
-
-double simplified_nmos_Isat(
- double nwidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nwidth * dt->I_on_n;
-}
-
-double simplified_pmos_Isat(
- double pwidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pwidth * dt->I_on_n/dt->n_to_p_eff_curr_drv_ratio;
-}
-
-
-double simplified_nmos_leakage(
- double nwidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nwidth * dt->I_off_n;
-}
-
-double simplified_pmos_leakage(
- double pwidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pwidth * dt->I_off_p;
-}
-
-double cmos_Ig_n(
- double nWidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return nWidth*dt->I_g_on_n;
-}
-
-double cmos_Ig_p(
- double pWidth,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx)
-{
- TechnologyParameter::DeviceType * dt;
-
- if ((!_is_dram)&&(_is_cell))
- { //SRAM cell access transistor
- dt = &(g_tp.sram_cell);
- }
- else if ((_is_dram)&&(_is_wl_tr))
- { //DRAM wordline transistor
- dt = &(g_tp.dram_wl);
- }
- else if (_is_sleep_tx)
- {
- dt = &g_tp.sleep_tx; // Sleep transistor
- }
- else
- { //DRAM or SRAM all other transistors
- dt = &(g_tp.peri_global);
- }
- return pWidth*dt->I_g_on_p;
-}
-
-double cmos_Isub_leakage(
- double nWidth,
- double pWidth,
- int fanin,
- enum Gate_type g_type,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
- double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
- double Isub=0;
- int num_states;
- int num_off_tx;
-
- num_states = int(pow(2.0, fanin));
-
- switch (g_type)
- {
- case nmos:
- if (fanin==1)
- {
- Isub = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
- }
-
- }
- break;
- case pmos:
- if (fanin==1)
- {
- Isub = pmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
- }
- else
- {
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
- }
-
- }
- break;
- case inv:
- Isub = (nmos_leak + pmos_leak)/2;
- break;
- case nand:
- Isub += fanin*pmos_leak;//the pullup network
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
- {
- //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub /=num_states;
- break;
- case nor:
- for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
- {
- //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
- Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
- }
- Isub += fanin*nmos_leak;//the pulldown network
- Isub /=num_states;
- break;
- case tri:
- Isub += (nmos_leak + pmos_leak)/2;//enabled
- Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
- Isub /=2;
- break;
- case tg:
- Isub = (nmos_leak + pmos_leak)/2;
- break;
- default:
- assert(0);
- break;
- }
-
- return Isub;
-}
-
-
-double cmos_Ig_leakage(
- double nWidth,
- double pWidth,
- int fanin,
- enum Gate_type g_type,
- bool _is_dram,
- bool _is_cell,
- bool _is_wl_tr,
- bool _is_sleep_tx,
- enum Half_net_topology topo)
-{
- assert (fanin>=1);
- double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
- double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
- double Ig_on=0;
- int num_states;
- int num_on_tx;
-
- num_states = int(pow(2.0, fanin));
-
- switch (g_type)
- {
- case nmos:
- if (fanin==1)
- {
- Ig_on = nmos_leak/num_states;
- }
- else
- {
- if (topo==parallel)
- {
- for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
- {
- Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
- }
- }
- else
- {
- Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
- //num_on_tx is the number of on tx
- for (num_on_tx=1; num_on_tx
-#include
-#include
-#include
-#include
-#include "const.h"
-
-using namespace std;
-
-
-class min_values_t;
-class mem_array;
-class uca_org_t;
-
-
-class powerComponents
-{
- public:
- double dynamic;
- double leakage;
- double gate_leakage;
- double short_circuit;
- double longer_channel_leakage;
- double power_gated_leakage;
- double power_gated_with_long_channel_leakage;
-
- powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0),
- longer_channel_leakage(0), power_gated_leakage(0),
- power_gated_with_long_channel_leakage (0) { }
- powerComponents(const powerComponents & obj) { *this = obj; }
- powerComponents & operator=(const powerComponents & rhs)
- {
- dynamic = rhs.dynamic;
- leakage = rhs.leakage;
- gate_leakage = rhs.gate_leakage;
- short_circuit = rhs.short_circuit;
- longer_channel_leakage = rhs.longer_channel_leakage;
- power_gated_leakage = rhs.power_gated_leakage;
- power_gated_with_long_channel_leakage = rhs.power_gated_with_long_channel_leakage;
- return *this;
- }
- void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;
- longer_channel_leakage = 0; power_gated_leakage = 0;power_gated_with_long_channel_leakage=0;}
-
- friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
- friend powerComponents operator*(const powerComponents & x, double const * const y);
-};
-
-
-
-class powerDef
-{
- public:
- powerComponents readOp;
- powerComponents writeOp;
- powerComponents searchOp;//Sheng: for CAM and FA
-
- powerDef() : readOp(), writeOp(), searchOp() { }
- void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
-
- friend powerDef operator+(const powerDef & x, const powerDef & y);
- friend powerDef operator*(const powerDef & x, double const * const y);
-};
-
-enum Wire_type
-{
- Global /* gloabl wires with repeaters */,
- Global_5 /* 5% delay penalty */,
- Global_10 /* 10% delay penalty */,
- Global_20 /* 20% delay penalty */,
- Global_30 /* 30% delay penalty */,
- Low_swing /* differential low power wires with high area overhead */,
- Semi_global /* mid-level wires with repeaters*/,
- Transmission /* tranmission lines with high area overhead */,
- Optical /* optical wires */,
- Invalid_wtype
-};
-
-
-
-class InputParameter
-{
- public:
-
- InputParameter();
- void parse_cfg(const string & infile);
-
- bool error_checking(); // return false if the input parameters are problematic
- void display_ip();
-
- unsigned int cache_sz; // in bytes
- unsigned int line_sz;
- unsigned int assoc;
- unsigned int nbanks;
- unsigned int out_w;// == nr_bits_out
- bool specific_tag;
- unsigned int tag_w;
- unsigned int access_mode;
- unsigned int obj_func_dyn_energy;
- unsigned int obj_func_dyn_power;
- unsigned int obj_func_leak_power;
- unsigned int obj_func_cycle_t;
-
- double F_sz_nm; // feature size in nm
- double F_sz_um; // feature size in um
- bool specific_hp_vdd; // whether to have user defined vdd that is different from ITRS
- double hp_Vdd; // user specified vdd
- bool specific_lstp_vdd; // whether to have user defined vdd that is different from ITRS
- double lstp_Vdd;
- bool specific_lop_vdd; // whether to have user defined vdd that is different from ITRS
- double lop_Vdd;
- bool specific_vcc_min; // whether to have user defined vcc_min for power-gating that is different from the value constrained by technology for maintaining states
- double user_defined_vcc_min;
- bool user_defined_vcc_underflow; //flag to indicate when user defined vcc is too low for the circuit to retain state
- unsigned int num_rw_ports;
- unsigned int num_rd_ports;
- unsigned int num_wr_ports;
- unsigned int num_se_rd_ports; // number of single ended read ports
- unsigned int num_search_ports; // number of search ports for CAM
- bool is_main_mem;
- bool is_cache;
- bool pure_ram;
- bool pure_cam;
- bool rpters_in_htree; // if there are repeaters in htree segment
- unsigned int ver_htree_wires_over_array;
- unsigned int broadcast_addr_din_over_ver_htrees;
- unsigned int temp;
-
- unsigned int ram_cell_tech_type;
- unsigned int peri_global_tech_type;
- unsigned int data_arr_ram_cell_tech_type;
- unsigned int data_arr_peri_global_tech_type;
- unsigned int tag_arr_ram_cell_tech_type;
- unsigned int tag_arr_peri_global_tech_type;
-
- unsigned int burst_len;
- unsigned int int_prefetch_w;
- unsigned int page_sz_bits;
-
- unsigned int ic_proj_type; // interconnect_projection_type
- unsigned int wire_is_mat_type; // wire_inside_mat_type
- unsigned int wire_os_mat_type; // wire_outside_mat_type
- enum Wire_type wt;
- int force_wiretype;
- bool print_input_args;
- unsigned int nuca_cache_sz; // TODO
- int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
- bool force_cache_config;
-
- int cache_level;
- int cores;
- int nuca_bank_count;
- int force_nuca_bank;
-
- int delay_wt, dynamic_power_wt, leakage_power_wt,
- cycle_time_wt, area_wt;
- int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
- cycle_time_wt_nuca, area_wt_nuca;
-
- int delay_dev, dynamic_power_dev, leakage_power_dev,
- cycle_time_dev, area_dev;
- int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
- cycle_time_dev_nuca, area_dev_nuca;
- int ed; //ED or ED2 optimization
- int nuca;
-
- bool fast_access;
- unsigned int block_sz; // bytes
- unsigned int tag_assoc;
- unsigned int data_assoc;
- bool is_seq_acc;
- bool fully_assoc;
- unsigned int nsets; // == number_of_sets
- int print_detail;
-
-
- bool add_ecc_b_;
- //parameters for design constraint
- double throughput;
- double latency;
- bool pipelinable;
- int pipeline_stages;
- int per_stage_vector;
- bool with_clock_grid;
-
- bool array_power_gated;
- bool bitline_floating;
- bool wl_power_gated;
- bool cl_power_gated;
- bool interconect_power_gated;
- bool power_gating;
-
- double perfloss;
-
- bool cl_vertical;
-
- std::vector dvs_voltage;
-
- bool long_channel_device;
-};
-
-
-typedef struct{
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- int number_activated_mats_horizontal_direction;
- int number_subbanks;
- int page_size_in_bits;
- double delay_route_to_bank;
- double delay_crossbar;
- double delay_addr_din_horizontal_htree;
- double delay_addr_din_vertical_htree;
- double delay_row_predecode_driver_and_block;
- double delay_row_decoder;
- double delay_bitlines;
- double delay_sense_amp;
- double delay_subarray_output_driver;
- double delay_bit_mux_predecode_driver_and_block;
- double delay_bit_mux_decoder;
- double delay_senseamp_mux_lev_1_predecode_driver_and_block;
- double delay_senseamp_mux_lev_1_decoder;
- double delay_senseamp_mux_lev_2_predecode_driver_and_block;
- double delay_senseamp_mux_lev_2_decoder;
- double delay_input_htree;
- double delay_output_htree;
- double delay_dout_vertical_htree;
- double delay_dout_horizontal_htree;
- double delay_comparator;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double delay_request_network;
- double delay_inside_mat;
- double delay_reply_network;
- double trcd;
- double cas_latency;
- double precharge_delay;
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_addr_horizontal_htree;
- powerDef power_datain_horizontal_htree;
- powerDef power_dataout_horizontal_htree;
- powerDef power_addr_vertical_htree;
- powerDef power_datain_vertical_htree;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
- powerDef power_crossbar;
- powerDef total_power;
- double area;
- double all_banks_height;
- double all_banks_width;
- double bank_height;
- double bank_width;
- double subarray_memory_cell_area_height;
- double subarray_memory_cell_area_width;
- double mat_height;
- double mat_width;
- double routing_area_height_within_bank;
- double routing_area_width_within_bank;
- double area_efficiency;
-// double perc_power_dyn_routing_to_bank;
-// double perc_power_dyn_addr_horizontal_htree;
-// double perc_power_dyn_datain_horizontal_htree;
-// double perc_power_dyn_dataout_horizontal_htree;
-// double perc_power_dyn_addr_vertical_htree;
-// double perc_power_dyn_datain_vertical_htree;
-// double perc_power_dyn_row_predecoder_drivers;
-// double perc_power_dyn_row_predecoder_blocks;
-// double perc_power_dyn_row_decoders;
-// double perc_power_dyn_bit_mux_predecoder_drivers;
-// double perc_power_dyn_bit_mux_predecoder_blocks;
-// double perc_power_dyn_bit_mux_decoders;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_1_decoders;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_dyn_senseamp_mux_lev_2_decoders;
-// double perc_power_dyn_bitlines;
-// double perc_power_dyn_sense_amps;
-// double perc_power_dyn_prechg_eq_drivers;
-// double perc_power_dyn_subarray_output_drivers;
-// double perc_power_dyn_dataout_vertical_htree;
-// double perc_power_dyn_comparators;
-// double perc_power_dyn_crossbar;
-// double perc_power_dyn_spent_outside_mats;
-// double perc_power_leak_routing_to_bank;
-// double perc_power_leak_addr_horizontal_htree;
-// double perc_power_leak_datain_horizontal_htree;
-// double perc_power_leak_dataout_horizontal_htree;
-// double perc_power_leak_addr_vertical_htree;
-// double perc_power_leak_datain_vertical_htree;
-// double perc_power_leak_row_predecoder_drivers;
-// double perc_power_leak_row_predecoder_blocks;
-// double perc_power_leak_row_decoders;
-// double perc_power_leak_bit_mux_predecoder_drivers;
-// double perc_power_leak_bit_mux_predecoder_blocks;
-// double perc_power_leak_bit_mux_decoders;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_1_decoders;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
-// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
-// double perc_power_leak_senseamp_mux_lev_2_decoders;
-// double perc_power_leak_bitlines;
-// double perc_power_leak_sense_amps;
-// double perc_power_leak_prechg_eq_drivers;
-// double perc_power_leak_subarray_output_drivers;
-// double perc_power_leak_dataout_vertical_htree;
-// double perc_power_leak_comparators;
-// double perc_power_leak_crossbar;
-// double perc_leak_mats;
-// double perc_active_mats;
- double refresh_power;
- double dram_refresh_period;
- double dram_array_availability;
- double dyn_read_energy_from_closed_page;
- double dyn_read_energy_from_open_page;
- double leak_power_subbank_closed_page;
- double leak_power_subbank_open_page;
- double leak_power_request_and_reply_networks;
- double activate_energy;
- double read_energy;
- double write_energy;
- double precharge_energy;
-} results_mem_array;
-
-
-class uca_org_t
-{
- public:
- mem_array * tag_array2;
- mem_array * data_array2;
- double access_time;
- double cycle_time;
- double area;
- double area_efficiency;
- powerDef power;
- double leak_power_with_sleep_transistors_in_mats;
- double cache_ht;
- double cache_len;
- char file_n[100];
- double vdd_periph_global;
- bool valid;
- results_mem_array tag_array;
- results_mem_array data_array;
- std::vector uca_q;//for results share the same settings (g_ip and dyn_p) but with different tech settings such as DVFS
- uca_org_t * uca_pg_reference;//for references results when power gating is enabled.
- uca_org_t();
- void find_delay();
- void find_energy();
- void find_area();
- void find_cyc();
- void adjust_area();//for McPAT only to adjust routing overhead
- void cleanup();
- ~uca_org_t();
-};
-
-void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
-
-uca_org_t cacti_interface(const string & infile_name);
-//McPAT's plain interface, please keep !!!
-uca_org_t cacti_interface(InputParameter * const local_interface);
-//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter * const local_interface);
-//McPAT's plain interface, please keep !!!
-uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,
- int excl_write_ports,
- int single_ended_read_ports,
- int search_ports,
- int banks,
- double tech_node,
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode,
- int cache,
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_cycle_time,
- int obj_func_area,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area,
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in,
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in,
- int wire_inside_mat_type_in,
- int wire_outside_mat_type_in,
- int REPEATERS_IN_HTREE_SEGMENTS_in,
- int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
- int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
- int PAGE_SIZE_BITS_in,
- int BURST_LENGTH_in,
- int INTERNAL_PREFETCH_WIDTH_in,
- int force_wiretype,
- int wiretype,
- int force_config,
- int ndwl,
- int ndbl,
- int nspd,
- int ndcm,
- int ndsam1,
- int ndsam2,
- int ecc);
-// int cache_size,
-// int line_size,
-// int associativity,
-// int rw_ports,
-// int excl_read_ports,
-// int excl_write_ports,
-// int single_ended_read_ports,
-// int banks,
-// double tech_node,
-// int output_width,
-// int specific_tag,
-// int tag_width,
-// int access_mode,
-// int cache,
-// int main_mem,
-// int obj_func_delay,
-// int obj_func_dynamic_power,
-// int obj_func_leakage_power,
-// int obj_func_area,
-// int obj_func_cycle_time,
-// int dev_func_delay,
-// int dev_func_dynamic_power,
-// int dev_func_leakage_power,
-// int dev_func_area,
-// int dev_func_cycle_time,
-// int temp,
-// int data_arr_ram_cell_tech_flavor_in,
-// int data_arr_peri_global_tech_flavor_in,
-// int tag_arr_ram_cell_tech_flavor_in,
-// int tag_arr_peri_global_tech_flavor_in,
-// int interconnect_projection_type_in,
-// int wire_inside_mat_type_in,
-// int wire_outside_mat_type_in,
-// int REPEATERS_IN_HTREE_SEGMENTS_in,
-// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
-// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
-//// double MAXAREACONSTRAINT_PERC_in,
-//// double MAXACCTIMECONSTRAINT_PERC_in,
-//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
-// int PAGE_SIZE_BITS_in,
-// int BURST_LENGTH_in,
-// int INTERNAL_PREFETCH_WIDTH_in);
-
-//Naveen's interface
-uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,
- int excl_write_ports,
- int single_ended_read_ports,
- int banks,
- double tech_node,
- int page_sz,
- int burst_length,
- int pre_width,
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode, //0 normal, 1 seq, 2 fast
- int cache, //scratch ram or cache
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_area,
- int obj_func_cycle_time,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area,
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in,
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
- int wire_inside_mat_type_in,
- int wire_outside_mat_type_in,
- int is_nuca, // 0 - UCA, 1 - NUCA
- int core_count,
- int cache_level, // 0 - L2, 1 - L3
- int nuca_bank_count,
- int nuca_obj_func_delay,
- int nuca_obj_func_dynamic_power,
- int nuca_obj_func_leakage_power,
- int nuca_obj_func_area,
- int nuca_obj_func_cycle_time,
- int nuca_dev_func_delay,
- int nuca_dev_func_dynamic_power,
- int nuca_dev_func_leakage_power,
- int nuca_dev_func_area,
- int nuca_dev_func_cycle_time,
- int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
- int p_input);
-
-class mem_array
-{
- public:
- int Ndcm;
- int Ndwl;
- int Ndbl;
- double Nspd;
- int deg_bl_muxing;
- int Ndsam_lev_1;
- int Ndsam_lev_2;
- double access_time;
- double cycle_time;
- double multisubbank_interleave_cycle_time;
- double area_ram_cells;
- double area;
- powerDef power;
- double delay_senseamp_mux_decoder;
- double delay_before_subarray_output_driver;
- double delay_from_subarray_output_driver_to_output;
- double height;
- double width;
-
- double mat_height;
- double mat_length;
- double subarray_length;
- double subarray_height;
-
- double delay_route_to_bank,
- delay_input_htree,
- delay_row_predecode_driver_and_block,
- delay_row_decoder,
- delay_bitlines,
- delay_sense_amp,
- delay_subarray_output_driver,
- delay_dout_htree,
- delay_comparator,
- delay_matchlines;
-
- double all_banks_height,
- all_banks_width,
- area_efficiency;
-
- powerDef power_routing_to_bank;
- powerDef power_addr_input_htree;
- powerDef power_data_input_htree;
- powerDef power_data_output_htree;
- powerDef power_htree_in_search;
- powerDef power_htree_out_search;
- powerDef power_row_predecoder_drivers;
- powerDef power_row_predecoder_blocks;
- powerDef power_row_decoders;
- powerDef power_bit_mux_predecoder_drivers;
- powerDef power_bit_mux_predecoder_blocks;
- powerDef power_bit_mux_decoders;
- powerDef power_senseamp_mux_lev_1_predecoder_drivers;
- powerDef power_senseamp_mux_lev_1_predecoder_blocks;
- powerDef power_senseamp_mux_lev_1_decoders;
- powerDef power_senseamp_mux_lev_2_predecoder_drivers;
- powerDef power_senseamp_mux_lev_2_predecoder_blocks;
- powerDef power_senseamp_mux_lev_2_decoders;
- powerDef power_bitlines;
- powerDef power_sense_amps;
- powerDef power_prechg_eq_drivers;
- powerDef power_output_drivers_at_subarray;
- powerDef power_dataout_vertical_htree;
- powerDef power_comparators;
-
- powerDef power_cam_bitline_precharge_eq_drv;
- powerDef power_searchline;
- powerDef power_searchline_precharge;
- powerDef power_matchlines;
- powerDef power_matchline_precharge;
- powerDef power_matchline_to_wordline_drv;
-
- min_values_t *arr_min;
- enum Wire_type wt;
-
- // dram stats
- double activate_energy, read_energy, write_energy, precharge_energy,
- refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
- leak_power_request_and_reply_networks;
-
- double precharge_delay;
-
- //Power-gating stats
- double array_leakage;
- double wl_leakage;
- double cl_leakage;
-
- double sram_sleep_tx_width, wl_sleep_tx_width, cl_sleep_tx_width;
- double sram_sleep_tx_area, wl_sleep_tx_area, cl_sleep_tx_area;
- double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, cl_sleep_wakeup_latency, bl_floating_wakeup_latency;
- double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, cl_sleep_wakeup_energy, bl_floating_wakeup_energy;
-
- int num_active_mats;
- int num_submarray_mats;
-
- double long_channel_leakage_reduction_periperal;
- double long_channel_leakage_reduction_memcell;
-
- static bool lt(const mem_array * m1, const mem_array * m2);
-};
-
-
-#endif
diff --git a/cacti/crossbar.cc b/cacti/crossbar.cc
deleted file mode 100644
index d7386a8..0000000
--- a/cacti/crossbar.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "crossbar.h"
-
-#define ASPECT_THRESHOLD .8
-#define ADJ 1
-
-Crossbar::Crossbar(
- double n_inp_,
- double n_out_,
- double flit_size_,
- TechnologyParameter::DeviceType *dt
- ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
-{
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- Vdd = dt->Vdd;
- CB_ADJ = 1;
-}
-
-Crossbar::~Crossbar(){}
-
-double Crossbar::output_buffer()
-{
-
- //Wire winit(4, 4);
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire w1(g_ip->wt, l_eff);
- //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
- double s1 = w1.repeater_size * (l_eff n_to_p_eff_curr_drv_ratio;
- // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
- TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- TriS2 = s1; //driver transistor
-
- if (TriS1 < 1)
- TriS1 = 1;
-
- double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
- gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
-// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
-// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
-// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
-// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
-// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
-// gate_C(TriS2*min_w_pmos, 0);
- tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(TriS2*g_tp.min_w_nmos_, 0)+
- drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(TriS2*min_w_pmos, 0);
- double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
- double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
-
- tri_inp_cap = input_cap;
- tri_out_cap = output_cap;
- tri_ctr_cap = ctr_cap;
- return input_cap + output_cap + ctr_cap;
-}
-
-void Crossbar::compute_power()
-{
-
- Wire winit(4, 4);
- double tri_cap = output_buffer();
- assert(tri_cap > 0);
- //area of a tristate logic
- double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
- g_area *= 2; // to model area of output transistors
- g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
- g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
- double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
- // effective no. of tristate buffers that need to be laid side by side
- int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
- double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
- Wire w1(g_ip->wt, wire_len);
-
- area.w = wire_len;
- area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
- Wire w2(g_ip->wt, area.h);
-
- double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
- if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
-
- if (aspect_ratio_cb < ASPECT_THRESHOLD) {
- if (n_out > 2 && n_inp > 2) {
- CB_ADJ+=0.2;
- //cout << "CB ADJ " << CB_ADJ << endl;
- if (CB_ADJ < 4) {
- this->compute_power();
- }
- }
- }
-
-
-
- power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
- power.readOp.leakage = n_inp * n_out * flit_size * (
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.leakage + w2.power.readOp.leakage);
- power.readOp.gate_leakage = n_inp * n_out * flit_size * (
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
- cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
- w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
-
- // delay calculation
- double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
- Wire wdriver(g_ip->wt, l_eff);
- double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
- double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
- delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- Wire wreset();
-}
-
-void Crossbar::print_crossbar()
-{
- cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
- cout << "Flit size : " << flit_size << " bits" << endl;
- cout << "Width : " << area.w << " u" << endl;
- cout << "Height : " << area.h << " u" << endl;
- cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
- cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
- cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
- cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
-}
-
-
diff --git a/cacti/decoder.cc b/cacti/decoder.cc
deleted file mode 100644
index 1f18629..0000000
--- a/cacti/decoder.cc
+++ /dev/null
@@ -1,1699 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include "area.h"
-#include "decoder.h"
-#include "parameter.h"
-#include
-#include
-#include
-
-using namespace std;
-
-
-Decoder::Decoder(
- int _num_dec_signals,
- bool flag_way_select,
- double _C_ld_dec_out,
- double _R_wire_dec_out,
- bool fully_assoc_,
- bool is_dram_,
- bool is_wl_tr_,
- const Area & cell_,
- bool power_gating_,
- int nodes_DSTN_)
-:exist(false),
- C_ld_dec_out(_C_ld_dec_out),
- R_wire_dec_out(_R_wire_dec_out),
- num_gates(0), num_gates_min(2),
- delay(0),
- //power(),
- fully_assoc(fully_assoc_), is_dram(is_dram_),
- is_wl_tr(is_wl_tr_),
- total_driver_nwidth(0),
- total_driver_pwidth(0),
- cell(cell_),
- power_gating(power_gating_),
- nodes_DSTN(nodes_DSTN_),
- sleeptx(NULL)
-{
-
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- w_dec_n[i] = 0;
- w_dec_p[i] = 0;
- }
-
- /*
- * _num_dec_signals is the number of decoded signal as output
- * num_addr_bits_dec is the number of signal to be decoded
- * as the decoders input.
- */
- int num_addr_bits_dec = _log2(_num_dec_signals);
-
- if (num_addr_bits_dec < 4)
- {
- if (flag_way_select)
- {
- exist = true;
- num_in_signals = 2;
- }
- else
- {
- num_in_signals = 0;
- }
- }
- else
- {
- exist = true;
-
- if (flag_way_select)
- {
- num_in_signals = 3;
- }
- else
- {
- num_in_signals = 2;
- }
- }
-
- assert(cell.h>0);
- assert(cell.w>0);
- // the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
- //area.h = 4 * cell.h;
- area.h = g_tp.h_dec * cell.h;
-
- compute_widths();
- compute_area();
-
-}
-
-
-
-void Decoder::compute_widths()
-{
- double F;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
- double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
- double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
-
- if (exist)
- {
- if (num_in_signals == 2 || fully_assoc)
- {
- w_dec_n[0] = 2 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand2;
- }
- else
- {
- w_dec_n[0] = 3 * g_tp.min_w_nmos_;
- w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
- F = gnand3;
- }
-
- F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
- gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
- num_gates = logical_effort(
- num_gates_min,
- num_in_signals == 2 ? gnand2 : gnand3,
- F,
- w_dec_n,
- w_dec_p,
- C_ld_dec_out,
- p_to_n_sz_ratio,
- is_dram,
- is_wl_tr,
- g_tp.max_w_nmos_dec);
-
- }
-}
-
-
-
-void Decoder::compute_area()
-{
- double cumulative_area = 0;
- double cumulative_curr = 0; // cumulative leakage current
- double cumulative_curr_Ig = 0; // cumulative leakage current
-
- if (exist)
- { // First check if this decoder exists
- if (num_in_signals == 2)
- {
- cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
- }
- else if (num_in_signals == 3)
- {
- cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
- cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
- }
-
- for (int i = 1; i < num_gates; i++)
- {
- cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
- cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
- cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
- }
- power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
- power.readOp.power_gated_leakage = cumulative_curr * g_tp.peri_global.Vcc_min;
- power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
-
- area.w = (cumulative_area / area.h);
- if (power_gating)
- {
- compute_power_gating();
- cumulative_area += sleeptx->area.get_area();
- area.w = (cumulative_area / area.h);
- }
- }
-}
-
-void Decoder::compute_power_gating()
-{
- //For all driver chains there is only one sleep transistors to save area
- //Total transistor width for sleep tx calculation
- for (int i = 0; i power_gating)
- sleeptx = new Sleep_tx (g_ip->perfloss,
- Isat_subarray,
- is_footer,
- c_wakeup,
- detalV,
- nodes_DSTN,
- area);
-}
-
-double Decoder::compute_delays(double inrisetime)
-{
- if (exist)
- {
- double ret_val = 0; // outrisetime
- int i;
- double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
- double Vdd = g_tp.peri_global.Vdd;
-
- if ((is_wl_tr) && (is_dram))
- {
- Vpp = g_tp.vpp;
- }
- else if (is_wl_tr)
- {
- Vpp = g_tp.sram_cell.Vdd;
- }
- else
- {
- Vpp = g_tp.peri_global.Vdd;
- }
-
- // first check whether a decoder is required at all
- rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
- c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
- c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
- drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
-// cout<<"w_dec_n["<<0<<"] = "<blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
-{
- driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
- drv1->power_nand3_path.readOp.leakage +
- drv2->power_nand2_path.readOp.leakage +
- drv2->power_nand3_path.readOp.leakage;
- block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
- blk1->power_nand3_path.readOp.leakage +
- blk1->power_L2.readOp.leakage +
- blk2->power_nand2_path.readOp.leakage +
- blk2->power_nand3_path.readOp.leakage +
- blk2->power_L2.readOp.leakage;
-
- driver_power.readOp.power_gated_leakage = drv1->power_nand2_path.readOp.power_gated_leakage +
- drv1->power_nand3_path.readOp.power_gated_leakage +
- drv2->power_nand2_path.readOp.power_gated_leakage +
- drv2->power_nand3_path.readOp.power_gated_leakage;
- block_power.readOp.power_gated_leakage = blk1->power_nand2_path.readOp.power_gated_leakage +
- blk1->power_nand3_path.readOp.power_gated_leakage +
- blk1->power_L2.readOp.power_gated_leakage +
- blk2->power_nand2_path.readOp.power_gated_leakage +
- blk2->power_nand3_path.readOp.power_gated_leakage +
- blk2->power_L2.readOp.power_gated_leakage;
-
- power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
-
- power.readOp.power_gated_leakage = driver_power.readOp.power_gated_leakage + block_power.readOp.power_gated_leakage;
-
- driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
- drv1->power_nand3_path.readOp.gate_leakage +
- drv2->power_nand2_path.readOp.gate_leakage +
- drv2->power_nand3_path.readOp.gate_leakage;
- block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
- blk1->power_nand3_path.readOp.gate_leakage +
- blk1->power_L2.readOp.gate_leakage +
- blk2->power_nand2_path.readOp.gate_leakage +
- blk2->power_nand3_path.readOp.gate_leakage +
- blk2->power_L2.readOp.gate_leakage;
- power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
-}
-
-void PredecBlkDrv::leakage_feedback(double temperature)
-{
- double leak_nand2_path = 0;
- double leak_nand3_path = 0;
- double gate_leak_nand2_path = 0;
- double gate_leak_nand3_path = 0;
-
- if (flag_driver_exists)
- { // first check whether a predecoder block driver is needed
- for (int i = 0; i < number_gates_nand2_path; ++i)
- {
- leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
- }
- leak_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
- gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load);
-
- for (int i = 0; i < number_gates_nand3_path; ++i)
- {
- leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
- gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
- }
- leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
- gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
-
- power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
- power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
- power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
- }
-}
-
-double Predec::compute_delays(double inrisetime)
-{
- // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
- pair tmp_pair1, tmp_pair2;
- tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
- tmp_pair1 = blk1->compute_delays(tmp_pair1);
- tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
- tmp_pair2 = blk2->compute_delays(tmp_pair2);
- tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
-
- driver_power.readOp.dynamic =
- drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
- drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
- drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
- drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
-
- block_power.readOp.dynamic =
- blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk1->power_L2.readOp.dynamic +
- blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path +
- blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path +
- blk2->power_L2.readOp.dynamic;
-
- power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
-
- delay = tmp_pair1.first;
- return tmp_pair1.second;
-}
-
-
-void Predec::leakage_feedback(double temperature)
-{
- drv1->leakage_feedback(temperature);
- drv2->leakage_feedback(temperature);
- blk1->leakage_feedback(temperature);
- blk2->leakage_feedback(temperature);
-
- driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
- drv1->power_nand3_path.readOp.leakage +
- drv2->power_nand2_path.readOp.leakage +
- drv2->power_nand3_path.readOp.leakage;
- block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
- blk1->power_nand3_path.readOp.leakage +
- blk1->power_L2.readOp.leakage +
- blk2->power_nand2_path.readOp.leakage +
- blk2->power_nand3_path.readOp.leakage +
- blk2->power_L2.readOp.leakage;
- power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
-
- driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
- drv1->power_nand3_path.readOp.gate_leakage +
- drv2->power_nand2_path.readOp.gate_leakage +
- drv2->power_nand3_path.readOp.gate_leakage;
- block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
- blk1->power_nand3_path.readOp.gate_leakage +
- blk1->power_L2.readOp.gate_leakage +
- blk2->power_nand2_path.readOp.gate_leakage +
- blk2->power_nand3_path.readOp.gate_leakage +
- blk2->power_L2.readOp.gate_leakage;
- power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
-}
-
-// returns
-pair Predec::get_max_delay_before_decoder(
- pair input_pair1,
- pair input_pair2)
-{
- pair ret_val;
- double delay;
-
- delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
- ret_val.first = delay;
- ret_val.second = input_pair1.first;
- delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair1.second;
- }
- delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair2.first;
- }
- delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
- if (ret_val.first < delay)
- {
- ret_val.first = delay;
- ret_val.second = input_pair2.second;
- }
-
- return ret_val;
-}
-
-
-
-Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram, bool power_gating_, int nodes_DSTN_)
-:number_gates(0),
- min_number_gates(2),
- c_gate_load(c_gate_load_),
- c_wire_load(c_wire_load_),
- r_wire_load(r_wire_load_),
- delay(0),
-// power(),
- is_dram_(is_dram),
- total_driver_nwidth(0),
- total_driver_pwidth(0),
- power_gating(power_gating_),
- nodes_DSTN(nodes_DSTN_),
- sleeptx(NULL)
-{
- for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
- {
- width_n[i] = 0;
- width_p[i] = 0;
- }
-
- compute_widths();
- compute_area();
-}
-
-
-void Driver::compute_widths()
-{
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
- double c_load = c_gate_load + c_wire_load;
- width_n[0] = g_tp.min_w_nmos_;
- width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
-
- double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
- number_gates = logical_effort(
- min_number_gates,
- 1,
- F,
- width_n,
- width_p,
- c_load,
- p_to_n_sz_ratio,
- is_dram_, false,
- g_tp.max_w_nmos_);
-}
-
-void Driver::compute_area()
-{
- double cumulative_area = 0;
-
- area.h = g_tp.cell_h_def;
- for (int i = 0; i < number_gates; i++)
- {
- cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h);
-
- }
- area.w = (cumulative_area / area.h);
- if (power_gating)
- {
- compute_power_gating();
- cumulative_area += sleeptx->area.get_area();
- area.w = (cumulative_area / area.h);
- }
-}
-
-void Driver::compute_power_gating()
-{
- //For all driver chains there is only one sleep transistors to save area
- //Total transistor width for sleep tx calculation
- for (int i = 0; i power_gating)
- sleeptx = new Sleep_tx (g_ip->perfloss,
- Isat_subarray,
- is_footer,
- c_wakeup,
- detalV,
- nodes_DSTN,//default is 1 for drivers
- area);
-}
-
-
-double Driver::compute_delay(double inrisetime)
-{
- int i;
- double rd, c_load, c_intrinsic, tf;
- double this_delay = 0;
-
- for (i = 0; i < number_gates - 1; ++i)
- {
- rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
- c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- inrisetime = this_delay / (1.0 - 0.5);
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
- power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vcc_min;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
- }
-
- i = number_gates - 1;
- c_load = c_gate_load + c_wire_load;
- rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
- c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
- drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
- tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay += this_delay;
- power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
- power.readOp.power_gated_leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vcc_min;
- power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
-
- return this_delay / (1.0 - 0.5);
-}
-
-//TODO: add sleep tx in predec/predecblk/predecdriver
diff --git a/cacti/decoder.h b/cacti/decoder.h
deleted file mode 100644
index 83aefb9..0000000
--- a/cacti/decoder.h
+++ /dev/null
@@ -1,281 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-#ifndef __DECODER_H__
-#define __DECODER_H__
-
-#include "area.h"
-#include "component.h"
-#include "parameter.h"
-#include "powergating.h"
-#include
-
-using namespace std;
-
-
-class Decoder : public Component
-{
- public:
- Decoder(
- int _num_dec_signals,
- bool flag_way_select,
- double _C_ld_dec_out,
- double _R_wire_dec_out,
- bool fully_assoc_,
- bool is_dram_,
- bool is_wl_tr_,
- const Area & cell_,
- bool power_gating_ = false,
- int nodes_DSTN_ = 1);
-
- bool exist;
- int num_in_signals;
- double C_ld_dec_out;
- double R_wire_dec_out;
- int num_gates;
- int num_gates_min;
- double w_dec_n[MAX_NUMBER_GATES_STAGE];
- double w_dec_p[MAX_NUMBER_GATES_STAGE];
- double delay;
- //powerDef power;
- bool fully_assoc;
- bool is_dram;
- bool is_wl_tr;
-
- double total_driver_nwidth;
- double total_driver_pwidth;
- Sleep_tx * sleeptx;
-
- const Area & cell;
- int nodes_DSTN;
- bool power_gating;
-
- void compute_widths();
- void compute_area();
- double compute_delays(double inrisetime); // return outrisetime
- void compute_power_gating();
-
- void leakage_feedback(double temperature);
-
- ~Decoder()
- {
- if (sleeptx !=0)
- delete sleeptx;
- };
-};
-
-
-
-class PredecBlk : public Component
-{
- public:
- PredecBlk(
- int num_dec_signals,
- Decoder * dec,
- double C_wire_predec_blk_out,
- double R_wire_predec_blk_out,
- int num_dec_per_predec,
- bool is_dram_,
- bool is_blk1);
-
- Decoder * dec;
- bool exist;
- int number_input_addr_bits;
- double C_ld_predec_blk_out;
- double R_wire_predec_blk_out;
- int branch_effort_nand2_gate_output;
- int branch_effort_nand3_gate_output;
- bool flag_two_unique_paths;
- int flag_L2_gate;
- int number_inputs_L1_gate;
- int number_gates_L1_nand2_path;
- int number_gates_L1_nand3_path;
- int number_gates_L2;
- int min_number_gates_L1;
- int min_number_gates_L2;
- int num_L1_active_nand2_path;
- int num_L1_active_nand3_path;
- double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
- double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
- double w_L2_n[MAX_NUMBER_GATES_STAGE];
- double w_L2_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
- powerDef power_L2;
-
- bool is_dram_;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
- pair compute_delays(pair inrisetime); //
- // return
-};
-
-
-class PredecBlkDrv : public Component
-{
- public:
- PredecBlkDrv(
- int way_select,
- PredecBlk * blk_,
- bool is_dram);
-
- int flag_driver_exists;
- int number_input_addr_bits;
- int number_gates_nand2_path;
- int number_gates_nand3_path;
- int min_number_gates;
- int num_buffers_driving_1_nand2_load;
- int num_buffers_driving_2_nand2_load;
- int num_buffers_driving_4_nand2_load;
- int num_buffers_driving_2_nand3_load;
- int num_buffers_driving_8_nand3_load;
- int num_buffers_nand3_path;
- double c_load_nand2_path_out;
- double c_load_nand3_path_out;
- double r_load_nand2_path_out;
- double r_load_nand3_path_out;
- double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
- double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
- double delay_nand2_path;
- double delay_nand3_path;
- powerDef power_nand2_path;
- powerDef power_nand3_path;
-
- PredecBlk * blk;
- Decoder * dec;
- bool is_dram_;
- int way_select;
-
- void compute_widths();
- void compute_area();
-
- void leakage_feedback(double temperature);
-
-
- pair compute_delays(
- double inrisetime_nand2_path,
- double inrisetime_nand3_path); // return
-
- inline int num_addr_bits_nand2_path()
- {
- return num_buffers_driving_1_nand2_load +
- num_buffers_driving_2_nand2_load +
- num_buffers_driving_4_nand2_load;
- }
- inline int num_addr_bits_nand3_path()
- {
- return num_buffers_driving_2_nand3_load +
- num_buffers_driving_8_nand3_load;
- }
- double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
-};
-
-
-
-class Predec : public Component
-{
- public:
- Predec(
- PredecBlkDrv * drv1,
- PredecBlkDrv * drv2);
-
- double compute_delays(double inrisetime); // return outrisetime
-
- void leakage_feedback(double temperature);
- PredecBlk * blk1;
- PredecBlk * blk2;
- PredecBlkDrv * drv1;
- PredecBlkDrv * drv2;
-
- powerDef block_power;
- powerDef driver_power;
-
- private:
- // returns
- pair get_max_delay_before_decoder(
- pair input_pair1,
- pair input_pair2);
-};
-
-
-
-class Driver : public Component
-{
- public:
- Driver(double c_gate_load_, double c_wire_load_,
- double r_wire_load_, bool is_dram,
- bool power_gating_ = false,
- int nodes_DSTN_ = 1 );
-
- int number_gates;
- int min_number_gates;
- double width_n[MAX_NUMBER_GATES_STAGE];
- double width_p[MAX_NUMBER_GATES_STAGE];
- double c_gate_load;
- double c_wire_load;
- double r_wire_load;
- double delay;
-// powerDef power;
- bool is_dram_;
-
- double total_driver_nwidth;
- double total_driver_pwidth;
- Sleep_tx * sleeptx;
-
- int nodes_DSTN;
- bool power_gating;
-
- void compute_widths();
- void compute_area();
- double compute_delay(double inrisetime);
-
- void compute_power_gating();
-
- ~Driver()
- {
- if (sleeptx !=0)
- delete sleeptx;
- };
-};
-
-
-#endif
diff --git a/cacti/htree2.cc b/cacti/htree2.cc
deleted file mode 100644
index 5d71c93..0000000
--- a/cacti/htree2.cc
+++ /dev/null
@@ -1,659 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include "htree2.h"
-#include "wire.h"
-#include
-#include
-
-Htree2::Htree2(
- enum Wire_type wire_model, double mat_w, double mat_h,
- int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
- bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
- :in_rise_time(0), out_rise_time(0),
- tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
- add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
- search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
- uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
-{
- assert(ndbl >= 2 && ndwl >= 2);
-
-// if (ndbl == 1 && ndwl == 1)
-// {
-// delay = 0;
-// power.readOp.dynamic = 0;
-// power.readOp.leakage = 0;
-// area.w = mat_w;
-// area.h = mat_h;
-// return;
-// }
-// if (ndwl == 1) ndwl++;
-// if (ndbl == 1) ndbl++;
-
- max_unpipelined_link_delay = 0; //TODO
- min_w_nmos = g_tp.min_w_nmos_;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
-
- switch (htree_type)
- {
- case Add_htree:
- wire_bw = init_wire_bw = add_bits;
- in_htree();
- break;
- case Data_in_htree:
- wire_bw = init_wire_bw = data_in_bits;
- in_htree();
- break;
- case Data_out_htree:
- wire_bw = init_wire_bw = data_out_bits;
- out_htree();
- break;
- case Search_in_htree:
- wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
- in_htree();
- break;
- case Search_out_htree:
- wire_bw = init_wire_bw = search_data_out_bits;
- out_htree();
- break;
- default:
- assert(0);
- break;
- }
-
- power_bit = power;
- power.readOp.dynamic *= init_wire_bw;
-
- assert(power.readOp.dynamic >= 0);
- assert(power.readOp.leakage >= 0);
-}
-
-
-
-// nand gate sizing calculation
-void Htree2::input_nand(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of a repeater = input capacitance of nand.
- double nsize = s1*(1 + pton_size)/(2 + pton_size);
- nsize = (nsize < 1) ? 1 : nsize;
-
- double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
- (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
- 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
- delay+= horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
-
- power.searchOp.dynamic += 0.5 *
- (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd * wire_bw ;
- power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
- power.readOp.power_gated_leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vcc_min;
- power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
-}
-
-
-
-// tristate buffer model consisting of not, nand, nor, and driver transistors
-void Htree2::output_buffer(double s1, double s2, double l_eff)
-{
- Wire w1(wt, l_eff);
- double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
- // input capacitance of repeater = input capacitance of nand + nor.
- double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
- double s_eff = //stage eff of a repeater in a wire
- (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
- gate_C(s2*(min_w_nmos + min_w_pmos), 0);
- double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
- size = (size < 1) ? 1 : size;
-
- double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
- double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
- double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
- gate_C(tr_size*min_w_pmos, 0);
- double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
- gate_C(s1*(min_w_nmos + min_w_pmos), 0);
-
- double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
-
-
- delay += horowitz (w1.out_rise_time, tc,
- deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
-
- //nand
- power.readOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
-
- power.searchOp.dynamic += 0.5 *
- (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(tr_size*(min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //not
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
-
- power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //nor
- power.readOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
-
- power.searchOp.dynamic += 0.5 *
- (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
- +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- //output transistor
- power.readOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd;
-
- power.searchOp.dynamic += 0.5 *
- ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
- +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
- + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
- deviceType->Vdd * deviceType->Vdd*init_wire_bw;
-
- if(uca_tree) {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vcc_min*wire_bw;/*inverter + output tr*/
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vcc_min*wire_bw;//nand
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vcc_min*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=;
- }
- else {
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
-
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vcc_min*wire_bw;/*inverter + output tr*/
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vcc_min*wire_bw;//nand
- power.readOp.power_gated_leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vcc_min*wire_bw;//nor
-
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
- power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
- //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
- }
-}
-
-
-
-/* calculates the input h-tree delay/power
- * A nand gate is used at each node to
- * limit the signal
- * The area of an unbalanced htree (rows != columns)
- * depends on how data is traversed.
- * In the following function, if ( no. of rows < no. of columns),
- * then data first traverse in excess hor. links until vertical
- * and horizontal nodes are same.
- * If no. of rows is bigger, then data traverse in
- * a hor. link followed by a ver. link in a repeated
- * fashion (similar to a balanced tree) until there are no
- * hor. links left. After this it goes through the remaining vertical
- * links.
- */
- void
-Htree2::in_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2); // horizontal nodes
- int v = (int) _log2(ndbl/2); // vertical nodes
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {//Sheng: this computation do not consider the wires that route from edge to middle.
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
- }
- }
-
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.readOp.power_gated_leakage = 0;
- power.searchOp.dynamic =0;
- len = len_temp;
- ht = ht_temp/2;
-
- while (v > 0 || h > 0)
- {
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
-
- if (h > v)
- {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0)
- {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else
- {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // ver
- wtemp2 = new Wire(wt, ht/2); // hor
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
-
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.power_gated_leakage += wtemp1->power.readOp.power_gated_leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- if ((uca_tree == false && option == 2) || search_tree==true)
- {
- wire_bw*=2; // wire bandwidth doubles only for vertical branches
- }
-
- if (uca_tree == false)
- {
- if (len_temp > wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
-
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- input_nand(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.power_gated_leakage += wtemp2->power.readOp.power_gated_leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
-
- if (uca_tree)
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- input_nand(s2, s3, l_eff);
- }
- }
-
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
-}
-
-
-
-/* a tristate buffer is used to handle fan-ins
- * The area of an unbalanced htree (rows != columns)
- * depends on how data is traversed.
- * In the following function, if ( no. of rows < no. of columns),
- * then data first traverse in excess hor. links until vertical
- * and horizontal nodes are same.
- * If no. of rows is bigger, then data traverse in
- * a hor. link followed by a ver. link in a repeated
- * fashion (similar to a balanced tree) until there are no
- * hor. links left. After this it goes through the remaining vertical
- * links.
- */
-void Htree2::out_htree()
-{
- //temp var
- double s1 = 0, s2 = 0, s3 = 0;
- double l_eff = 0;
- Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
- double len = 0, ht = 0;
- int option = 0;
-
- int h = (int) _log2(ndwl/2);
- int v = (int) _log2(ndbl/2);
- double len_temp;
- double ht_temp;
- if (uca_tree)
- {
- ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,h))))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
- 2 * (1-pow(0.5,v))))/2;
- }
- else
- {
- if (ndwl == ndbl) {
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
-
- }
- else if (ndwl > ndbl) {
- double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
- (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
- }
- else {
- double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
- ht_temp = ((mat_height*ndbl/2) +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
- )/2;
- len_temp = (mat_width*ndwl/2 +
- ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
- (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
- }
- }
- area.h = ht_temp * 2;
- area.w = len_temp * 2;
- delay = 0;
- power.readOp.dynamic = 0;
- power.readOp.leakage = 0;
- power.readOp.power_gated_leakage = 0;
- power.readOp.gate_leakage = 0;
- //cout<<"power.readOp.gate_leakage"< 0 || h > 0)
- { //finds delay/power of each link in the tree
- if (wtemp1) delete wtemp1;
- if (wtemp2) delete wtemp2;
- if (wtemp3) delete wtemp3;
-
- if(h > v) {
- //the iteration considers only one horizontal link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, len/2); // ver
- len_temp = len;
- len /= 2;
- wtemp3 = 0;
- h--;
- option = 0;
- }
- else if (v>0 && h>0) {
- //considers one horizontal link and one vertical link
- wtemp1 = new Wire(wt, len); // hor
- wtemp2 = new Wire(wt, ht); // ver
- wtemp3 = new Wire(wt, len/2); // next hor
- len_temp = len;
- ht_temp = ht;
- len /= 2;
- ht /= 2;
- v--;
- h--;
- option = 1;
- }
- else {
- // considers only one vertical link
- assert(h == 0);
- wtemp1 = new Wire(wt, ht); // hor
- wtemp2 = new Wire(wt, ht/2); // ver
- ht_temp = ht;
- ht /= 2;
- wtemp3 = 0;
- v--;
- option = 2;
- }
- delay += wtemp1->delay;
- power.readOp.dynamic += wtemp1->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
- power.readOp.power_gated_leakage += wtemp1->power.readOp.power_gated_leakage*wire_bw;
- power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"< wtemp1->repeater_spacing)
- {
- s1 = wtemp1->repeater_size;
- l_eff = wtemp1->repeater_spacing;
- }
- else
- {
- s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
- l_eff = len_temp;
- }
- if (ht_temp > wtemp2->repeater_spacing)
- {
- s2 = wtemp2->repeater_size;
- }
- else
- {
- s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
- }
- // first level
- output_buffer(s1, s2, l_eff);
- }
-
-
- if (option != 1)
- {
- continue;
- }
-
- // second level
- delay += wtemp2->delay;
- power.readOp.dynamic += wtemp2->power.readOp.dynamic;
- power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
- power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
- power.readOp.power_gated_leakage += wtemp2->power.readOp.power_gated_leakage*wire_bw;
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- //cout<<"power.readOp.gate_leakage"<power.readOp.leakage*wire_bw);
- power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- }
- else
- {
- power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
- power.readOp.power_gated_leakage += (wtemp2->power.readOp.power_gated_leakage*wire_bw);
- power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
- wire_bw*=2;
-
- if (ht_temp > wtemp3->repeater_spacing)
- {
- s3 = wtemp3->repeater_size;
- l_eff = wtemp3->repeater_spacing;
- }
- else
- {
- s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
- l_eff = ht_temp;
- }
-
- output_buffer(s2, s3, l_eff);
- }
- //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage<
-#include
-#include
-#include
-
-#include "io.h"
-#include "area.h"
-#include "basic_circuit.h"
-#include "parameter.h"
-#include "Ucache.h"
-#include "nuca.h"
-#include "crossbar.h"
-#include "arbiter.h"
-#include "version_cacti.h"
-//#include "highradix.h"
-
-using namespace std;
-
-
-InputParameter::InputParameter()
-: array_power_gated(false),
- bitline_floating(false),
- wl_power_gated(false),
- cl_power_gated(false),
- interconect_power_gated(false),
- power_gating(false),
- perfloss(0.01),
- cl_vertical (true),
- long_channel_device(false)
-{
- dvs_voltage = std::vector(0);
-}
-
-/* Parses "cache.cfg" file */
- void
-InputParameter::parse_cfg(const string & in_file)
-{
- FILE *fp = fopen(in_file.c_str(), "r");
- char line[5000];
- char jk[5000];
- char temp_var[5000];
- double temp_double;
- char *data = line;
- int offset= 0;
-
- if(!fp) {
- cout << in_file << " is missing!\n";
- exit(-1);
- }
-
- while(fscanf(fp, "%[^\n]\n", line) != EOF) {
-
- if (!strncmp("-size", line, strlen("-size"))) {
- sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
- continue;
- }
-
- if (!strncmp("-page size", line, strlen("-page size"))) {
- sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
- continue;
- }
-
- if (!strncmp("-burst length", line, strlen("-burst length"))) {
- sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
- continue;
- }
-
- if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
- sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
- continue;
- }
-
- if (!strncmp("-block", line, strlen("-block"))) {
- sscanf(line, "-block size (bytes) %d", &(line_sz));
- continue;
- }
-
- if (!strncmp("-associativity", line, strlen("-associativity"))) {
- sscanf(line, "-associativity %d", &(assoc));
- continue;
- }
-
- if (!strncmp("-read-write", line, strlen("-read-write"))) {
- sscanf(line, "-read-write port %d", &(num_rw_ports));
- continue;
- }
-
- if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
- sscanf(line, "-exclusive read port %d", &(num_rd_ports));
- continue;
- }
-
- if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
- sscanf(line, "-exclusive write port %d", &(num_wr_ports));
- continue;
- }
-
- if (!strncmp("-single ended", line, strlen("-single ended"))) {
- sscanf(line, "-single %[(:-~)*]%d", jk,
- &(num_se_rd_ports));
- continue;
- }
-
- if (!strncmp("-search", line, strlen("-search"))) {
- sscanf(line, "-search port %d", &(num_search_ports));
- continue;
- }
-
- if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
- sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
- continue;
- }
-
- if (!strncmp("-technology", line, strlen("-technology"))) {
- sscanf(line, "-technology (u) %lf", &(F_sz_um));
- F_sz_nm = F_sz_um*1000;
- continue;
- }
-
- if (!strncmp("-hp Vdd", line, strlen("-hp Vdd"))) {
- sscanf(line, "-hp Vdd%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_hp_vdd = false;
- hp_Vdd = 1.0; /*
- * if this is by default, then the vdd value in g_ip here does not matter
- */
- }
- else {
- specific_hp_vdd = true;
- sscanf(line, "-hp Vdd (V) %lf", &(hp_Vdd));
- }
- continue;
- }
-
- if (!strncmp("-lstp Vdd", line, strlen("-lstp Vdd"))) {
- sscanf(line, "-lstp Vdd%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_lstp_vdd = false;
- lstp_Vdd = 1.0; /*
- * if this is by default, then the vdd value in g_ip here does not matter
- */
- }
- else {
- specific_lstp_vdd = true;
- sscanf(line, "-lstp Vdd (V) %lf", &(lstp_Vdd));
- }
- continue;
- }
-
- if (!strncmp("-lop Vdd", line, strlen("-lop Vdd"))) {
- sscanf(line, "-lop Vdd%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_lop_vdd = false;
- lop_Vdd = 1.0; /*
- * if this is by default, then the vdd value in g_ip here does not matter
- */
- }
- else {
- specific_lop_vdd = true;
- sscanf(line, "-lop Vdd (V) %lf", &(lop_Vdd));
- }
- continue;
- }
-
- if (!strncmp("-DVS(V):", line, strlen("-DVS(V):"))) {
- memmove (line,line+9,strlen(line));
- while (1 == sscanf(data, "%lf%n", &temp_double, &offset)) {
- data += offset;
- dvs_voltage.push_back(temp_double);
- }
-// dvs_levels = dvs_voltage.size();
- continue;
- }
-
- if (!strncmp("-Powergating voltage", line, strlen("-Powergating voltage"))) {
- sscanf(line, "-Powergating voltage%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_vcc_min= false;
- user_defined_vcc_min = 1.0; /*
- * if this is by default, then the vdd value in g_ip here does not matter
- */
- }
- else {
- specific_vcc_min = true;
- sscanf(line, "-Powergating voltage (V) %lf", &(user_defined_vcc_min));
- }
- continue;
- }
-
-
- if (!strncmp("-output/input", line, strlen("-output/input"))) {
- sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
- continue;
- }
-
- if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
- sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
- continue;
- }
-
- if (!strncmp("-cache type", line, strlen("-cache type"))) {
- sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("cache", temp_var, sizeof("cache"))) {
- is_cache = true;
- }
- else
- {
- is_cache = false;
- }
-
- if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
- is_main_mem = true;
- }
- else {
- is_main_mem = false;
- }
-
- if (!strncmp("cam", temp_var, sizeof("cam"))) {
- pure_cam = true;
- }
- else {
- pure_cam = false;
- }
-
- if (!strncmp("ram", temp_var, sizeof("ram"))) {
- pure_ram = true;
- }
- else {
- if (!is_main_mem)
- pure_ram = false;
- else
- pure_ram = true;
- }
-
- continue;
- }
-
-
- if (!strncmp("-tag size", line, strlen("-tag size"))) {
- sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("default", temp_var, sizeof("default"))) {
- specific_tag = false;
- tag_w = 42; /* the acutal value is calculated
- * later based on the cache size, bank count, and associativity
- */
- }
- else {
- specific_tag = true;
- sscanf(line, "-tag size (b) %d", &(tag_w));
- }
- continue;
- }
-
- if (!strncmp("-access mode", line, strlen("-access mode"))) {
- sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("fast", temp_var, strlen("fast"))) {
- access_mode = 2;
- }
- else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
- access_mode = 1;
- }
- else if(!strncmp("normal", temp_var, strlen("normal"))) {
- access_mode = 0;
- }
- else {
- cout << "ERROR: Invalid access mode!\n";
- exit(0);
- }
- continue;
- }
-
- if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) {
- sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- data_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- data_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
-
- if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
- sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- data_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- data_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- data_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
-
- if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
- sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_ram_cell_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_ram_cell_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_ram_cell_tech_type = 2;
- }
- else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
- tag_arr_ram_cell_tech_type = 3;
- }
- else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
- tag_arr_ram_cell_tech_type = 4;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
-
- if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
- sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
- tag_arr_peri_global_tech_type = 0;
- }
- else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
- tag_arr_peri_global_tech_type = 1;
- }
- else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
- tag_arr_peri_global_tech_type = 2;
- }
- else {
- cout << "ERROR: Invalid type!\n";
- exit(0);
- }
- continue;
- }
- if(!strncmp("-design", line, strlen("-design"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt), &(dynamic_power_wt),
- &(leakage_power_wt),
- &(cycle_time_wt), &(area_wt));
- continue;
- }
-
- if(!strncmp("-deviate", line, strlen("-deviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev), &(dynamic_power_dev),
- &(leakage_power_dev),
- &(cycle_time_dev), &(area_dev));
- continue;
- }
-
- if(!strncmp("-Optimize", line, strlen("-Optimize"))) {
- sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if(!strncmp("ED^2", temp_var, strlen("ED^2"))) {
- ed = 2;
- }
- else if(!strncmp("ED", temp_var, strlen("ED"))) {
- ed = 1;
- }
- else {
- ed = 0;
- }
- }
-
- if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_wt_nuca), &(dynamic_power_wt_nuca),
- &(leakage_power_wt_nuca),
- &(cycle_time_wt_nuca), &(area_wt_nuca));
- continue;
- }
-
- if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
- sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
- &(delay_dev_nuca), &(dynamic_power_dev_nuca),
- &(leakage_power_dev_nuca),
- &(cycle_time_dev_nuca), &(area_dev_nuca));
- continue;
- }
-
- if(!strncmp("-Cache model", line, strlen("-cache model"))) {
- sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("UCA", temp_var, strlen("UCA"))) {
- nuca = 0;
- }
- else {
- nuca = 1;
- }
- continue;
- }
-
- if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
- sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
-
- if (nuca_bank_count != 0) {
- force_nuca_bank = 1;
- }
- continue;
- }
-
- if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_is_mat_type = 2;
- continue;
- }
- else if (!strncmp("local", temp_var, strlen("local"))) {
- wire_is_mat_type = 0;
- continue;
- }
- else {
- wire_is_mat_type = 1;
- continue;
- }
- }
-
- if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("global", temp_var, strlen("global"))) {
- wire_os_mat_type = 2;
- }
- else {
- wire_os_mat_type = 1;
- }
- continue;
- }
-
- if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
- sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
- ic_proj_type = 0;
- }
- else {
- ic_proj_type = 1;
- }
- continue;
- }
-
- if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
- sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
-
- if (!strncmp("default", temp_var, strlen("default"))) {
- force_wiretype = 0;
- wt = Global;
- }
- else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
- force_wiretype = 1;
- wt = Global_10;
- }
- else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
- force_wiretype = 1;
- wt = Global_20;
- }
- else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
- force_wiretype = 1;
- wt = Global_30;
- }
- else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
- force_wiretype = 1;
- wt = Global_5;
- }
- else if (!(strncmp("Global", temp_var, strlen("Global")))) {
- force_wiretype = 1;
- wt = Global;
- }
- else {
- wt = Low_swing;
- force_wiretype = 1;
- }
- continue;
- }
-
-
-
- if(!strncmp("-Core", line, strlen("-Core"))) {
- sscanf(line, "-Core count %d\n", &(cores));
- if (cores > 16) {
- printf("No. of cores should be less than 16!\n");
- }
- continue;
- }
-
- if(!strncmp("-Cache level", line, strlen("-Cache level"))) {
- sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("L2", temp_var, strlen("L2"))) {
- cache_level = 0;
- }
- else {
- cache_level = 1;
- }
- }
-
- if(!strncmp("-Print level", line, strlen("-Print level"))) {
- sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
- print_detail = 1;
- }
- else {
- print_detail = 0;
- }
-
- }
- if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
- sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- add_ecc_b_ = true;
- }
- else {
- add_ecc_b_ = false;
- }
- }
-
- if(!strncmp("-CLDriver vertical", line, strlen("-CLDriver vertical"))) {
- sscanf(line, "-CLDriver vertical %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- cl_vertical = true;
- }
- else {
- cl_vertical = false;
- }
- }
-
- if(!strncmp("-Array Power Gating", line, strlen("-Array Power Gating"))) {
- sscanf(line, "-Array Power Gating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- array_power_gated = true;
- }
- else {
- array_power_gated = false;
- }
- }
-
- if(!strncmp("-Bitline floating", line, strlen("-Bitline floating"))) {
- sscanf(line, "-Bitline floating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- bitline_floating = true;
- }
- else {
- bitline_floating = false;
- }
- }
-
- if(!strncmp("-WL Power Gating", line, strlen("-WL Power Gating"))) {
- sscanf(line, "-WL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- wl_power_gated = true;
- }
- else {
- wl_power_gated = false;
- }
- }
-
- if(!strncmp("-CL Power Gating", line, strlen("-CL Power Gating"))) {
- sscanf(line, "-CL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- cl_power_gated = true;
- }
- else {
- cl_power_gated = false;
- }
- }
-
- if(!strncmp("-Interconnect Power Gating", line, strlen("-Interconnect Power Gating"))) {
- sscanf(line, "-Interconnect Power Gating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- interconect_power_gated = true;
- }
- else {
- interconect_power_gated = false;
- }
- }
-
- if(!strncmp("-Power Gating Performance Loss", line, strlen("-Power Gating Performance Loss"))) {
- sscanf(line, "-Power Gating Performance Loss %lf", &(perfloss));
- continue;
- }
-
- if(!strncmp("-Power Gating", line, strlen("-Power Gating"))) {
- sscanf(line, "-Power Gating %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- power_gating = true;
- }
- else {
- power_gating = false;
- }
- }
-
- if(!strncmp("-Long channel devices", line, strlen("-Long channel devices"))) {
- sscanf(line, "-Long channel devices %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- long_channel_device = true;
- }
- else {
- long_channel_device = false;
- }
- }
-
- if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
- sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- print_input_args = true;
- }
- else {
- print_input_args = false;
- }
- }
-
- if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
- sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
- if (!strncmp("true", temp_var, strlen("true"))) {
- force_cache_config = true;
- }
- else {
- force_cache_config = false;
- }
- }
-
- if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
- sscanf(line, "-Ndbl %d\n", &(ndbl));
- continue;
- }
- if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
- sscanf(line, "-Ndwl %d\n", &(ndwl));
- continue;
- }
- if(!strncmp("-Nspd", line, strlen("-Nspd"))) {
- sscanf(line, "-Nspd %d\n", &(nspd));
- continue;
- }
- if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
- sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
- continue;
- }
- if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
- sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
- continue;
- }
- if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
- sscanf(line, "-Ndcm %d\n", &(ndcm));
- continue;
- }
-
- }
- rpters_in_htree = true;
- fclose(fp);
-}
-
- void
-InputParameter::display_ip()
-{
- cout << "Cache size : " << cache_sz << endl;
- cout << "Block size : " << line_sz << endl;
- cout << "Associativity : " << assoc << endl;
- cout << "Read only ports : " << num_rd_ports << endl;
- cout << "Write only ports : " << num_wr_ports << endl;
- cout << "Read write ports : " << num_rw_ports << endl;
- cout << "Single ended read ports : " << num_se_rd_ports << endl;
- if (fully_assoc||pure_cam)
- {
- cout << "Search ports : " << num_search_ports << endl;
- }
- cout << "Cache banks (UCA) : " << nbanks << endl;
- cout << "Technology : " << F_sz_um << endl;
- cout << "User specified HP Vdd (v)? : " << std::boolalpha << specific_hp_vdd << endl;
- if (specific_hp_vdd)
- {
- cout << "User defined HP Vdd (v) : " << hp_Vdd << endl;
- }
- cout << "User specified LSTP Vdd (v)? : " << std::boolalpha << specific_lstp_vdd << endl;
- if (specific_lstp_vdd)
- {
- cout << "User defined HP Vdd (v) : " << lstp_Vdd << endl;
- }
- cout << "User specified LOP Vdd (v)? : " << std::boolalpha << specific_lop_vdd << endl;
- if (specific_lop_vdd)
- {
- cout << "User defined HP Vdd (v) : " << lop_Vdd << endl;
- }
- cout << "Temperature : " << temp << endl;
- cout << "Tag size : " << tag_w << endl;
- if (is_cache) {
- cout << "array type : " << "Cache" << endl;
- }
- if (pure_ram) {
- cout << "array type : " << "Scratch RAM" << endl;
- }
- if (pure_cam)
- {
- cout << "array type : " << "CAM" << endl;
- }
- cout << "Model as memory : " << is_main_mem << endl;
- cout << "Access mode : " << access_mode << endl;
- cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
- cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
- cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
- cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
- cout << "Optimization target : " << ed << endl;
- cout << "Design objective (UCA wt) : " << delay_wt << " "
- << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
- << " " << area_wt << endl;
- cout << "Design objective (UCA dev) : " << delay_dev << " "
- << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
- << " " << area_dev << endl;
- if (nuca)
- {
- cout << "Cores : " << cores << endl;
-
-
- cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
- << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
- << " " << area_wt_nuca << endl;
- cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
- << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
- << " " << area_dev_nuca << endl;
- }
- cout << "Cache model : " << nuca << endl;
- cout << "Nuca bank : " << nuca_bank_count << endl;
- cout << "Wire inside mat : " << wire_is_mat_type << endl;
- cout << "Wire outside mat : " << wire_os_mat_type << endl;
- cout << "Interconnect projection : " << ic_proj_type << endl;
- cout << "Wire signalling : " << force_wiretype << endl;
- cout << "Print level : " << print_detail << endl;
- cout << "ECC overhead : " << add_ecc_b_ << endl;
- cout << "Page size : " << page_sz_bits << endl;
- cout << "Burst length : " << burst_len << endl;
- cout << "Internal prefetch width : " << int_prefetch_w << endl;
- cout << "Force cache config : " << g_ip->force_cache_config << endl;
- if (g_ip->force_cache_config) {
- cout << "Ndwl : " << g_ip->ndwl << endl;
- cout << "Ndbl : " << g_ip->ndbl << endl;
- cout << "Nspd : " << g_ip->nspd << endl;
- cout << "Ndcm : " << g_ip->ndcm << endl;
- cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
- cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
- }
- // cout << "Placing subarray out driver vertical? : " << g_ip->cl_vertical << endl;
-}
-
-
-
-powerComponents operator+(const powerComponents & x, const powerComponents & y)
-{
- powerComponents z;
-
- z.dynamic = x.dynamic + y.dynamic;
- z.leakage = x.leakage + y.leakage;
- z.gate_leakage = x.gate_leakage + y.gate_leakage;
- z.short_circuit = x.short_circuit + y.short_circuit;
- z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
- z.power_gated_leakage = x.power_gated_leakage + y.power_gated_leakage;
- z.power_gated_with_long_channel_leakage = x.power_gated_with_long_channel_leakage + y.power_gated_with_long_channel_leakage;
-
- return z;
-}
-
-powerComponents operator*(const powerComponents & x, double const * const y)
-{
- powerComponents z;
-
- z.dynamic = x.dynamic*y[0];
- z.leakage = x.leakage*y[1];
- z.gate_leakage = x.gate_leakage*y[2];
- z.short_circuit = x.short_circuit*y[3];
- z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage
- z.power_gated_leakage = x.power_gated_leakage*y[1];//power_gated_leakage has the same behavior as normal leakage
- z.power_gated_with_long_channel_leakage = x.power_gated_with_long_channel_leakage*y[1];//power_gated_with_long_channel_leakage has the same behavior as normal leakage
- return z;
-}
-
-
-powerDef operator+(const powerDef & x, const powerDef & y)
-{
- powerDef z;
-
- z.readOp = x.readOp + y.readOp;
- z.writeOp = x.writeOp + y.writeOp;
- z.searchOp = x.searchOp + y.searchOp;
- return z;
-}
-
-powerDef operator*(const powerDef & x, double const * const y)
-{
- powerDef z;
-
- z.readOp = x.readOp*y;
- z.writeOp = x.writeOp*y;
- z.searchOp = x.searchOp*y;
- return z;
-}
-
-uca_org_t cacti_interface(const string & infile_name)
-{
-
- uca_org_t fin_res;
- //uca_org_t result;
- fin_res.valid = false;
-
- g_ip = new InputParameter();
- g_ip->parse_cfg(infile_name);
- if(!g_ip->error_checking())
- exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
-
- init_tech_params(g_ip->F_sz_um, false);//this init is for initializing wires
- Wire winit; // Do not delete this line. It initializes wires.
-// g_tp.peri_global.display();
-// g_tp.sram_cell.display();
-
-
-// For HighRadix Only
-// //// Wire wirea(g_ip->wt, 1000);
-// //// wirea.print_wire();
-// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl;
-// // winit.print_wire();
-// //
-// HighRadix *hr;
-// hr = new HighRadix();
-// hr->compute_power();
-// hr->print_router();
-// exit(0);
-//
-// double sub_switch_sz = 2;
-// double rows = 32;
-// for (int i=0; i<6; i++) {
-// sub_switch_sz = pow(2, i);
-// rows = 64/sub_switch_sz;
-// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7);
-// hr->compute_power();
-// hr->print_router();
-// delete hr;
-// }
-// // HighRadix yarc;
-// // yarc.compute_power();
-// // yarc.print_router();
-// winit.print_wire();
-// exit(0);
-// For HighRadix Only End
-
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
-
- //g_ip->display_ip();
-
- solve(&fin_res);
-// output_UCA(&fin_res);
-// Wire::print_wire();
- output_data_csv(fin_res);
-
- if (!g_ip->dvs_voltage.empty())
- {
- update_dvs(&fin_res);
- }
- if (g_ip->power_gating)
- {
- update_pg(&fin_res);//this is needed for compute area overhead of power-gating, even the gated power is calculated together un-gated leakage
- }
- output_UCA(&fin_res);
-
- Wire wprint;//reset wires to original configuration as in *.cfg file (dvs level 0)
- Wire::print_wire();
-
- delete (g_ip);
- return fin_res;
-}
-
-//cacti6.5's plain interface, please keep !!!
-uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,
- int excl_write_ports,
- int single_ended_read_ports,
- int banks,
- double tech_node, // in nm
- int page_sz,
- int burst_length,
- int pre_width,
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode, //0 normal, 1 seq, 2 fast
- int cache, //scratch ram or cache
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_area,
- int obj_func_cycle_time,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area,
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in, // 0-4
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
- int wire_inside_mat_type_in,
- int wire_outside_mat_type_in,
- int is_nuca, // 0 - UCA, 1 - NUCA
- int core_count,
- int cache_level, // 0 - L2, 1 - L3
- int nuca_bank_count,
- int nuca_obj_func_delay,
- int nuca_obj_func_dynamic_power,
- int nuca_obj_func_leakage_power,
- int nuca_obj_func_area,
- int nuca_obj_func_cycle_time,
- int nuca_dev_func_delay,
- int nuca_dev_func_dynamic_power,
- int nuca_dev_func_leakage_power,
- int nuca_dev_func_area,
- int nuca_dev_func_cycle_time,
- int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
- int p_input)
-{
- g_ip = new InputParameter();
- g_ip->add_ecc_b_ = true;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = burst_length;
- g_ip->int_prefetch_w = pre_width;
- g_ip->page_sz_bits = page_sz;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (tag_width == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->ed = ed_ed2_none;
-
- switch(wt) {
- case (0):
- g_ip->force_wiretype = 0;
- g_ip->wt = Global;
- break;
- case (1):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global;
- break;
- case (2):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_5;
- break;
- case (3):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_10;
- break;
- case (4):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_20;
- break;
- case (5):
- g_ip->force_wiretype = 1;
- g_ip->wt = Global_30;
- break;
- case (6):
- g_ip->force_wiretype = 1;
- g_ip->wt = Low_swing;
- break;
- default:
- cout << "Unknown wire type!\n";
- exit(0);
- }
-
- g_ip->delay_wt_nuca = nuca_obj_func_delay;
- g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
- g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
- g_ip->area_wt_nuca = nuca_obj_func_area;
- g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
- g_ip->delay_dev_nuca = dev_func_delay;
- g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
- g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
- g_ip->area_dev_nuca = nuca_dev_func_area;
- g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
- g_ip->nuca = is_nuca;
- g_ip->nuca_bank_count = nuca_bank_count;
- if(nuca_bank_count > 0) {
- g_ip->force_nuca_bank = 1;
- }
- g_ip->cores = core_count;
- g_ip->cache_level = cache_level;
-
- g_ip->temp = temp;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache != 0) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- g_ip->wt = Global_5;
- g_ip->force_cache_config = false;
- g_ip->force_wiretype = false;
- g_ip->print_input_args = p_input;
-
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- if (g_ip->error_checking() == false) exit(0);
- if (g_ip->print_input_args)
- g_ip->display_ip();
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
-
- if (g_ip->nuca == 1)
- {
- Nuca n(&g_tp.peri_global);
- n.sim_nuca();
- }
- solve(&fin_res);
-
- output_UCA(&fin_res);
-
- delete (g_ip);
- return fin_res;
-}
-
-//McPAT's plain interface, please keep !!!
-uca_org_t cacti_interface(
- int cache_size,
- int line_size,
- int associativity,
- int rw_ports,
- int excl_read_ports,// para5
- int excl_write_ports,
- int single_ended_read_ports,
- int search_ports,
- int banks,
- double tech_node,//para10
- int output_width,
- int specific_tag,
- int tag_width,
- int access_mode,
- int cache, //para15
- int main_mem,
- int obj_func_delay,
- int obj_func_dynamic_power,
- int obj_func_leakage_power,
- int obj_func_cycle_time, //para20
- int obj_func_area,
- int dev_func_delay,
- int dev_func_dynamic_power,
- int dev_func_leakage_power,
- int dev_func_area, //para25
- int dev_func_cycle_time,
- int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
- int temp,
- int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
- int data_arr_ram_cell_tech_flavor_in,//para30
- int data_arr_peri_global_tech_flavor_in,
- int tag_arr_ram_cell_tech_flavor_in,
- int tag_arr_peri_global_tech_flavor_in,
- int interconnect_projection_type_in,
- int wire_inside_mat_type_in,//para35
- int wire_outside_mat_type_in,
- int REPEATERS_IN_HTREE_SEGMENTS_in,
- int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
- int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
- int PAGE_SIZE_BITS_in,//para40
- int BURST_LENGTH_in,
- int INTERNAL_PREFETCH_WIDTH_in,
- int force_wiretype,
- int wiretype,
- int force_config,//para45
- int ndwl,
- int ndbl,
- int nspd,
- int ndcm,
- int ndsam1,//para50
- int ndsam2,
- int ecc)
-{
- g_ip = new InputParameter();
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
- g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
- g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
- g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-
- g_ip->ic_proj_type = interconnect_projection_type_in;
- g_ip->wire_is_mat_type = wire_inside_mat_type_in;
- g_ip->wire_os_mat_type = wire_outside_mat_type_in;
- g_ip->burst_len = BURST_LENGTH_in;
- g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
- g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-
- g_ip->cache_sz = cache_size;
- g_ip->line_sz = line_size;
- g_ip->assoc = associativity;
- g_ip->nbanks = banks;
- g_ip->out_w = output_width;
- g_ip->specific_tag = specific_tag;
- if (specific_tag == 0) {
- g_ip->tag_w = 42;
- }
- else {
- g_ip->tag_w = tag_width;
- }
-
- g_ip->access_mode = access_mode;
- g_ip->delay_wt = obj_func_delay;
- g_ip->dynamic_power_wt = obj_func_dynamic_power;
- g_ip->leakage_power_wt = obj_func_leakage_power;
- g_ip->area_wt = obj_func_area;
- g_ip->cycle_time_wt = obj_func_cycle_time;
- g_ip->delay_dev = dev_func_delay;
- g_ip->dynamic_power_dev = dev_func_dynamic_power;
- g_ip->leakage_power_dev = dev_func_leakage_power;
- g_ip->area_dev = dev_func_area;
- g_ip->cycle_time_dev = dev_func_cycle_time;
- g_ip->temp = temp;
- g_ip->ed = ed_ed2_none;
-
- g_ip->F_sz_nm = tech_node;
- g_ip->F_sz_um = tech_node / 1000;
- g_ip->is_main_mem = (main_mem != 0) ? true : false;
- g_ip->is_cache = (cache ==1) ? true : false;
- g_ip->pure_ram = (cache ==0) ? true : false;
- g_ip->pure_cam = (cache ==2) ? true : false;
- g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
- g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
- g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-
- g_ip->num_rw_ports = rw_ports;
- g_ip->num_rd_ports = excl_read_ports;
- g_ip->num_wr_ports = excl_write_ports;
- g_ip->num_se_rd_ports = single_ended_read_ports;
- g_ip->num_search_ports = search_ports;
-
- g_ip->print_detail = 1;
- g_ip->nuca = 0;
-
- if (force_wiretype == 0)
- {
- g_ip->wt = Global;
- g_ip->force_wiretype = false;
- }
- else
- { g_ip->force_wiretype = true;
- if (wiretype==10) {
- g_ip->wt = Global_10;
- }
- if (wiretype==20) {
- g_ip->wt = Global_20;
- }
- if (wiretype==30) {
- g_ip->wt = Global_30;
- }
- if (wiretype==5) {
- g_ip->wt = Global_5;
- }
- if (wiretype==0) {
- g_ip->wt = Low_swing;
- }
- }
- //g_ip->wt = Global_5;
- if (force_config == 0)
- {
- g_ip->force_cache_config = false;
- }
- else
- {
- g_ip->force_cache_config = true;
- g_ip->ndbl=ndbl;
- g_ip->ndwl=ndwl;
- g_ip->nspd=nspd;
- g_ip->ndcm=ndcm;
- g_ip->ndsam1=ndsam1;
- g_ip->ndsam2=ndsam2;
-
-
- }
-
- if (ecc==0){
- g_ip->add_ecc_b_=false;
- }
- else
- {
- g_ip->add_ecc_b_=true;
- }
-
-
- if(!g_ip->error_checking())
- exit(0);
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
-
- g_ip->display_ip();
- solve(&fin_res);
- output_UCA(&fin_res);
- output_data_csv(fin_res);
- delete (g_ip);
-
- return fin_res;
-}
-
-
-
-bool InputParameter::error_checking()
-{
- int A;
- bool seq_access = false;
- fast_access = true;
- fully_assoc = false;
-
- switch (access_mode)
- {
- case 0:
- seq_access = false;
- fast_access = false;
- break;
- case 1:
- seq_access = true;
- fast_access = false;
- break;
- case 2:
- seq_access = false;
- fast_access = true;
- break;
- }
-
- if(is_main_mem)
- {
- if(ic_proj_type == 0)
- {
- cerr << "DRAM model supports only conservative interconnect projection!\n\n";
- return false;
- }
- }
-
- uint32_t B = line_sz;
-
- if (B < 1)
- {
-
- cerr << "Block size must >= 1" << endl;
- return false;
- }
- else if (B*8 < out_w)
- {
- cerr << "Block size must be at least " << out_w/8 << endl;
- return false;
- }
-
- if (F_sz_um <= 0)
- {
- cerr << "Feature size must be > 0" << endl;
- return false;
- }
- else if (F_sz_um > 0.181)
- {
- cerr << "Feature size must be <= 180 nm" << endl;
- return false;
- }else if (F_sz_um >0.091 && (data_arr_ram_cell_tech_type!= itrs_hp
- || tag_arr_ram_cell_tech_type!= itrs_hp
- || data_arr_peri_global_tech_type != itrs_hp
- ||tag_arr_peri_global_tech_type != itrs_hp))
- {
- cerr << "Feature size from 90nm to 180 nm only support the ITRS HP device type" << endl;
- return false;
- }
-
- uint32_t RWP = num_rw_ports;
- uint32_t ERP = num_rd_ports;
- uint32_t EWP = num_wr_ports;
- uint32_t NSER = num_se_rd_ports;
- uint32_t SCHP = num_search_ports;
-
-//TODO: revisit this. This is an important feature. Sheng thought this should be used
-// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
-// // the number of banks, we assume that the multiple ports are implemented via the multiple banks.
-// // In such a case we assume that each bank has 1 RWP port.
-// if ((RWP + ERP + EWP) <= nbanks && nbanks>1)
-// {
-// RWP = 1;
-// ERP = 0;
-// EWP = 0;
-// NSER = 0;
-// }
-// else if ((RWP < 0) || (EWP < 0) || (ERP < 0))
-// {
-// cerr << "Ports must >=0" << endl;
-// return false;
-// }
-// else if (RWP > 2)
-// {
-// cerr << "Maximum of 2 read/write ports" << endl;
-// return false;
-// }
-// else if ((RWP+ERP+EWP) < 1)
- // Changed to new implementation:
- // The number of ports specified at input is per bank
- if ((RWP+ERP+EWP) < 1)
- {
- cerr << "Must have at least one port" << endl;
- return false;
- }
-
- if (is_pow2(nbanks) == false)
- {
- cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl;
- return false;
- }
-
- int C = cache_sz/nbanks;
- if (C < 64)
- {
- cerr << "Cache size must >=64" << endl;
- return false;
- }
-
-//TODO: revisit this
-// if (pure_ram==true && assoc!=1)
-// {
-// cerr << "Pure RAM must have assoc as 1" << endl;
-// return false;
-// }
-
- //fully assoc and cam check
- if (is_cache && assoc==0)
- fully_assoc =true;
- else
- fully_assoc = false;
-
- if (pure_cam==true && assoc!=0)
- {
- cerr << "Pure CAM must have associativity as 0" << endl;
- return false;
- }
-
- if (assoc==0 && (pure_cam==false && is_cache ==false))
- {
- cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl;
- return false;
- }
-
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type
- || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type ))
- {
- cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl;
- return false;
- }
-
- if ((fully_assoc==true || pure_cam==true)
- && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram))
- {
- cerr << "DRAM based CAM and fully associative cache are not supported" << endl;
- return false;
- }
-
- if ((fully_assoc==true || pure_cam==true)
- && (is_main_mem==true))
- {
- cerr << "CAM and fully associative cache cannot be as main memory" << endl;
- return false;
- }
-
- if ((fully_assoc || pure_cam) && SCHP<1)
- {
- cerr << "CAM and fully associative must have at least 1 search port" << endl;
- return false;
- }
-
- if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam)))
- {
- ERP=SCHP;
- }
-
-// if ((!(fully_assoc || pure_cam)) && SCHP>=1)
-// {
-// cerr << "None CAM and fully associative cannot have search ports" << endl;
-// return false;
-// }
-
- if (assoc == 0)
- {
- A = C/B;
- //fully_assoc = true;
- }
- else
- {
- if (assoc == 1)
- {
- A = 1;
- //fully_assoc = false;
- }
- else
- {
- //fully_assoc = false;
- A = assoc;
- if (is_pow2(A) == false)
- {
- cerr << "Associativity must be a power of 2" << endl;
- return false;
- }
- }
- }
-
- if (C/(B*A) <= 1 && assoc!=0)
- {
- cerr << "Number of sets is too small: " << endl;
- cerr << " Need to either increase cache size, or decrease associativity or block size" << endl;
- cerr << " (or use fully associative cache)" << endl;
- return false;
- }
-
- block_sz = B;
-
- /*dt: testing sequential access mode*/
- if(seq_access)
- {
- tag_assoc = A;
- data_assoc = 1;
- is_seq_acc = true;
- }
- else
- {
- tag_assoc = A;
- data_assoc = A;
- is_seq_acc = false;
- }
-
- if (assoc==0)
- {
- data_assoc = 1;
- }
- num_rw_ports = RWP;
- num_rd_ports = ERP;
- num_wr_ports = EWP;
- num_se_rd_ports = NSER;
- if (!(fully_assoc || pure_cam))
- num_search_ports = 0;
- nsets = C/(B*A);
-
- if (temp < 300 || temp > 400 || temp%10 != 0)
- {
- cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl;
- return false;
- }
-
- if (nsets < 1)
- {
- cerr << "Less than one set..." << endl;
- return false;
- }
-
-// power_gating = (array_power_gated
-// || bitline_floating
-// || wl_power_gated
-// || cl_power_gated
-// || interconect_power_gated)?true:false;
-
- if (power_gating)
- {
- array_power_gated = true;
- bitline_floating = true;
- wl_power_gated = true;
- cl_power_gated = true;
- interconect_power_gated = true;
- }
- else
- {
- array_power_gated = false;
- bitline_floating = false;
- wl_power_gated = false;
- cl_power_gated = false;
- interconect_power_gated = false;
- }
-
-// if (power_gating && (!dvs_voltage.empty()))
-// {
-// cerr << "Power gating and DVS cannot be active simultaneously, please model them in two runs.\n\n";
-// return false;
-// }
-
- if (power_gating && (pure_cam||fully_assoc))
- {
- cerr << "Power gating in CAM is not supported yet.\n\n"<< endl;
- return false;
- }
-
- if (power_gating && (is_main_mem
- ||data_arr_ram_cell_tech_type== lp_dram
- ||data_arr_ram_cell_tech_type== comm_dram
- ||tag_arr_ram_cell_tech_type== lp_dram
- ||tag_arr_ram_cell_tech_type== comm_dram
- ||data_arr_peri_global_tech_type== lp_dram
- ||data_arr_peri_global_tech_type== comm_dram
- ||tag_arr_peri_global_tech_type== lp_dram
- || tag_arr_peri_global_tech_type== comm_dram))
- {
- cerr << "Power gating in DRAM is not supported. \n\n"<< endl;
- return false;
- }
-
- if (long_channel_device && (is_main_mem
- ||data_arr_ram_cell_tech_type== lp_dram
- ||data_arr_ram_cell_tech_type== comm_dram
- ||tag_arr_ram_cell_tech_type== lp_dram
- ||tag_arr_ram_cell_tech_type== comm_dram
- ||data_arr_peri_global_tech_type== lp_dram
- ||data_arr_peri_global_tech_type== comm_dram
- ||tag_arr_peri_global_tech_type== lp_dram
- || tag_arr_peri_global_tech_type== comm_dram))
- {
- cerr << "Long Channel Device in DRAM is not supported. \n\n"<< endl;
- return false;
- }
-
- if ((!dvs_voltage.empty()) && (is_main_mem
- ||data_arr_ram_cell_tech_type== lp_dram
- ||data_arr_ram_cell_tech_type== comm_dram
- ||tag_arr_ram_cell_tech_type== lp_dram
- ||tag_arr_ram_cell_tech_type== comm_dram
- ||data_arr_peri_global_tech_type== lp_dram
- ||data_arr_peri_global_tech_type== comm_dram
- ||tag_arr_peri_global_tech_type== lp_dram
- || tag_arr_peri_global_tech_type== comm_dram))
- {
- cerr << "DVS in DRAM is not supported. \n\n"<< endl;
- return false;
- }
-
-// if (power_gating && (specific_hp_vdd
-// || specific_lstp_vdd
-// || specific_lop_vdd))
-// {
-// cerr << "Default Vdd is recommended when enabling power gating.\n\n"<< endl;
-// return false;
-// }
-
- if ((!dvs_voltage.empty())&& ((data_arr_ram_cell_tech_type !=data_arr_peri_global_tech_type)
- ||(tag_arr_peri_global_tech_type !=tag_arr_ram_cell_tech_type)
- ||(data_arr_ram_cell_tech_type !=tag_arr_ram_cell_tech_type)))
- {
- cerr << "Same device types is recommended for tag/data/cell/peripheral for DVS. Same DVS voltage will be applied to different device types\n\n";
- return false;
- }
-
- return true;
-}
-
-
-
-void output_data_csv(const uca_org_t & fin_res)
-{
- //TODO: the csv output should remain
- fstream file("out.csv", ios::in);
- bool print_index = file.fail();
- file.close();
-
- file.open("out.csv", ios::out|ios::app);
- if (file.fail() == true)
- {
- cerr << "File out.csv could not be opened successfully" << endl;
- }
- else
- {
- if (print_index == true)
- {
- file << "Tech node (nm), ";
- file << "Capacity (bytes), ";
- file << "Number of banks, ";
- file << "Associativity, ";
- file << "Output width (bits), ";
- file << "Access time (ns), ";
- file << "Random cycle time (ns), ";
-// file << "Multisubbank interleave cycle time (ns), ";
-
-// file << "Delay request network (ns), ";
-// file << "Delay inside mat (ns), ";
-// file << "Delay reply network (ns), ";
-// file << "Tag array access time (ns), ";
-// file << "Data array access time (ns), ";
-// file << "Refresh period (microsec), ";
-// file << "DRAM array availability (%), ";
- file << "Dynamic search energy (nJ), ";
- file << "Dynamic read energy (nJ), ";
- file << "Dynamic write energy (nJ), ";
-// file << "Tag Dynamic read energy (nJ), ";
-// file << "Data Dynamic read energy (nJ), ";
-// file << "Dynamic read power (mW), ";
- file << "Standby leakage per bank(mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Leakage per bank with leak power management (mW), ";
-// file << "Refresh power as percentage of standby leakage, ";
- file << "Area (mm2), ";
- file << "Ndwl, ";
- file << "Ndbl, ";
- file << "Nspd, ";
- file << "Ndcm, ";
- file << "Ndsam_level_1, ";
- file << "Ndsam_level_2, ";
- file << "Data arrary area efficiency %, ";
- file << "Ntwl, ";
- file << "Ntbl, ";
- file << "Ntspd, ";
- file << "Ntcm, ";
- file << "Ntsam_level_1, ";
- file << "Ntsam_level_2, ";
- file << "Tag arrary area efficiency %, ";
-
-// file << "Resistance per unit micron (ohm-micron), ";
-// file << "Capacitance per unit micron (fF per micron), ";
-// file << "Unit-length wire delay (ps), ";
-// file << "FO4 delay (ps), ";
-// file << "delay route to bank (including crossb delay) (ps), ";
-// file << "Crossbar delay (ps), ";
-// file << "Dyn read energy per access from closed page (nJ), ";
-// file << "Dyn read energy per access from open page (nJ), ";
-// file << "Leak power of an subbank with page closed (mW), ";
-// file << "Leak power of a subbank with page open (mW), ";
-// file << "Leak power of request and reply networks (mW), ";
-// file << "Number of subbanks, ";
-// file << "Page size in bits, ";
-// file << "Activate power, ";
-// file << "Read power, ";
-// file << "Write power, ";
-// file << "Precharge power, ";
-// file << "tRCD, ";
-// file << "CAS latency, ";
-// file << "Precharge delay, ";
-// file << "Perc dyn energy bitlines, ";
-// file << "perc dyn energy wordlines, ";
-// file << "perc dyn energy outside mat, ";
-// file << "Area opt (perc), ";
-// file << "Delay opt (perc), ";
-// file << "Repeater opt (perc), ";
-// file << "Aspect ratio";
- file << endl;
- }
- file << g_ip->F_sz_nm << ", ";
- file << g_ip->cache_sz << ", ";
- file << g_ip->nbanks << ", ";
- file << g_ip->tag_assoc << ", ";
- file << g_ip->out_w << ", ";
- file << fin_res.access_time*1e+9 << ", ";
- file << fin_res.cycle_time*1e+9 << ", ";
-// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
-// file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
-// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", ";
-// file << fin_res.data_array2.delay_reply_network*1e+9 << ", ";
-
-// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
-// {
-// file << fin_res.tag_array2->access_time*1e+9 << ", ";
-// }
-// else
-// {
-// file << 0 << ", ";
-// }
-// file << fin_res.data_array2->access_time*1e+9 << ", ";
-// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
-// file << fin_res.data_array2->dram_array_availability << ", ";
- if (g_ip->fully_assoc || g_ip->pure_cam)
- {
- file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
- }
- else
- {
- file << "N/A" << ", ";
- }
- file << fin_res.power.readOp.dynamic*1e+9 << ", ";
- file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
-// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
-// {
-// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
-// }
-// else
-// {
-// file << "NA" << ", ";
-// }
-// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", ";
-// if (g_ip->fully_assoc || g_ip->pure_cam)
-// {
-// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", ";
-// }
-// else
-// {
-// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", ";
-// }
-
- file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
-// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
-// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
- file << fin_res.area*1e-6 << ", ";
-
- file << fin_res.data_array2->Ndwl << ", ";
- file << fin_res.data_array2->Ndbl << ", ";
- file << fin_res.data_array2->Nspd << ", ";
- file << fin_res.data_array2->deg_bl_muxing << ", ";
- file << fin_res.data_array2->Ndsam_lev_1 << ", ";
- file << fin_res.data_array2->Ndsam_lev_2 << ", ";
- file << fin_res.data_array2->area_efficiency << ", ";
- if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
- {
- file << fin_res.tag_array2->Ndwl << ", ";
- file << fin_res.tag_array2->Ndbl << ", ";
- file << fin_res.tag_array2->Nspd << ", ";
- file << fin_res.tag_array2->deg_bl_muxing << ", ";
- file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
- file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
- file << fin_res.tag_array2->area_efficiency << ", ";
- }
- else
- {
- file << "N/A" << ", ";
- file << "N/A"<< ", ";
- file << "N/A" << ", ";
- file << "N/A" << ", ";
- file << "N/A" << ", ";
- file << "N/A" << ", ";
- file << "N/A" << ", ";
- }
-
-// file << g_tp.wire_inside_mat.R_per_um << ", ";
-// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", ";
-// file << g_tp.unit_len_wire_del / 1e-12 << ", ";
-// file << g_tp.FO4 / 1e-12 << ", ";
-// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", ";
-// file << fin_res.data_array.delay_crossbar / 1e-9 << ", ";
-// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", ";
-// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", ";
-// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", ";
-// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", ";
-// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", ";
-// file << fin_res.data_array.number_subbanks << ", " ;
-// file << fin_res.data_array.page_size_in_bits << ", " ;
-// file << fin_res.data_array.activate_energy * 1e9 << ", " ;
-// file << fin_res.data_array.read_energy * 1e9 << ", " ;
-// file << fin_res.data_array.write_energy * 1e9 << ", " ;
-// file << fin_res.data_array.precharge_energy * 1e9 << ", " ;
-// file << fin_res.data_array.trcd * 1e9 << ", " ;
-// file << fin_res.data_array.cas_latency * 1e9 << ", " ;
-// file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
-// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
- file<data_array2->long_channel_leakage_reduction_memcell
- + 0.2*fr->data_array2->long_channel_leakage_reduction_periperal);//TODO
-double areaoverhead, overhead_data, overhead_tag;
-double wakeup_E, wakeup_T, wakeup_E_data, wakeup_T_data, wakeup_E_tag, wakeup_T_tag;
-int dvs_levels = g_ip->dvs_voltage.size();
-int i;
-bool dvs = !g_ip->dvs_voltage.empty();
- // if (NUCA)
- if (0) {
- cout << "\n\n Detailed Bank Stats:\n";
- cout << " Bank Size (bytes): %d\n" <<
- (int) (g_ip->cache_sz);
- }
- else {
- if (g_ip->data_arr_ram_cell_tech_type == 3) {
- cout << "\n---------- CACTI-P, with new features: "<data_arr_ram_cell_tech_type == 4) {
- cout << "\n---------- CACTI-P, with new features: "<cache_sz) << endl;
- }
-
- cout << " Number of banks: " << (int) g_ip->nbanks << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " Associativity: fully associative\n";
- else {
- if (g_ip->tag_assoc == 1)
- cout << " Associativity: direct mapped\n";
- else
- cout << " Associativity: " <<
- g_ip->tag_assoc << endl;
- }
-
-
- cout << " Block size (bytes): " << g_ip->line_sz << endl;
- cout << " Read/write Ports: " <<
- g_ip->num_rw_ports << endl;
- cout << " Read ports: " <<
- g_ip->num_rd_ports << endl;
- cout << " Write ports: " <<
- g_ip->num_wr_ports << endl;
- if (g_ip->fully_assoc|| g_ip->pure_cam)
- cout << " search ports: " <<
- g_ip->num_search_ports << endl;
- cout << " Technology size (nm): " <<
- g_ip->F_sz_nm << endl << endl;
-
-
- cout << " Access time (ns): " << fr->access_time*1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->access_time*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << " Cycle time (ns): " << fr->cycle_time*1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->cycle_time*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- if (g_ip->data_arr_ram_cell_tech_type >= 4) {
- cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
- cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
- cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
- cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
- cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
- cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
- cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
- cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
- cout << " Refresh power (mW): " <<
- fr->data_array2->refresh_power*1e3 << endl;
- }
- else {
- if ((g_ip->fully_assoc|| g_ip->pure_cam))
- {
- cout << " Total dynamic associative search energy per access (nJ): " <<
- fr->power.searchOp.dynamic*1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->power.searchOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- // cout << " Total dynamic read energy per access (nJ): " <<
- // fr->power.readOp.dynamic*1e9 << endl;
- // cout << " Total dynamic write energy per access (nJ): " <<
- // fr->power.writeOp.dynamic*1e9 << endl;
- }
- // else
- // {
- cout << " Total dynamic read energy per access (nJ): " <<
- fr->power.readOp.dynamic*1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->power.readOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- cout << " Total dynamic write energy per access (nJ): " <<
- fr->power.writeOp.dynamic*1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->power.writeOp.dynamic*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- // }
- if (g_ip->power_gating)
- {
- cout << " Total leakage power of a bank, with power-gating ";
- if (!g_ip->user_defined_vcc_underflow)
- {
- cout << "(state retained)";
- }
- else
- {
- cout << "(non state retained)";
- }
-
- cout <<", including its network outside" //power gated with retaining memory content
- " (mW): " << (g_ip->long_channel_device ? fr->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->power.readOp.power_gated_leakage)*1e3<long_channel_device ? fr->power.readOp.leakage*long_channel_leakage_reduction : fr->power.readOp.leakage)*1e3;
-
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device ? fr->uca_q[i]->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->power.readOp.leakage)*1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- }
-
- if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4)
- {
- }
- cout << " Cache height x width (mm): " <<
- fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl;
- cout << endl;
-
- if (g_ip->power_gating)
- {
- /* Energy/Power stats */
- cout << " Power-gating results (The virtual power supply for gated circuit can only retain the state of idle circuit, not for operating the circuit):" << endl;
- /* Data array power-gating stats */
- if (g_ip->user_defined_vcc_underflow)
- {
- cout<<" Warning: user defined power gating voltage is too low to retain state; Please understand the implications of deep sleep state on non state retaining and cold start effects when waking up the structure."<cache_ht*fr->cache_len/fr->uca_pg_reference->cache_ht/fr->uca_pg_reference->cache_len-1)*100;//%
- cout << " \tPower gating circuits (sleep transistors) induced area overhead: " <<
- areaoverhead << " % " << endl ;
- wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy
- + fr->data_array2->wl_sleep_wakeup_energy
- + fr->data_array2->bl_floating_wakeup_energy;
- wakeup_T = wakeup_T_data=MAX(fr->data_array2->sram_sleep_wakeup_latency,
- MAX(fr->data_array2->wl_sleep_wakeup_latency,fr->data_array2->bl_floating_wakeup_latency));
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy
- + fr->tag_array2->wl_sleep_wakeup_energy
- + fr->tag_array2->bl_floating_wakeup_energy;
- wakeup_T_tag=MAX(fr->tag_array2->sram_sleep_wakeup_latency,
- MAX(fr->tag_array2->wl_sleep_wakeup_latency,fr->tag_array2->bl_floating_wakeup_latency));
-
- wakeup_E += wakeup_E_tag;
- wakeup_T = MAX(wakeup_T_tag, wakeup_T_data);
-
- }
- cout << " \tPower gating Wakeup Latency (ns): " <<
- wakeup_T*1e9 << endl ;
- cout << " \tPower gating Wakeup Energy (nJ): " <<
- wakeup_E*1e9 << endl ;
- }
- cout <data_array2->Ndwl << endl;
- cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
- cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
- cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
- cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
- cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
- cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
- cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
- cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
- cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
- cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
- }
-
- switch (fr->data_array2->wt) {
- case (0):
- cout << " Data array, H-tree wire type: Delay optimized global wires\n";
- break;
- case (1):
- cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
- break;
- case (2):
- cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
- break;
- case (3):
- cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
- break;
- case (4):
- cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
- break;
- case (5):
- cout << " Data array, wire type: Low swing wires\n";
- break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) {
- switch (fr->tag_array2->wt) {
- case (0):
- cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
- break;
- case (1):
- cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
- break;
- case (2):
- cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
- break;
- case (3):
- cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
- break;
- case (4):
- cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
- break;
- case (5):
- cout << " Tag array, wire type: Low swing wires\n";
- break;
- default:
- cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <print_detail)
- {
- //if(g_ip->fully_assoc) return;
-if(0){ //detailed power-gating output
- if (g_ip->power_gating)
- {
- /* Energy/Power stats */
- cout << endl << endl << "Power-gating Components:" << endl << endl;
- /* Data array power-gating stats */
- areaoverhead = fr->cache_ht*fr->cache_len/fr->uca_pg_reference->cache_ht/fr->uca_pg_reference->cache_len-1;
- cout << " Power gating circuits (sleep transistors) induced area overhead: " <<
- areaoverhead << " % " << endl ;
- wakeup_E = wakeup_E_data = fr->data_array2->sram_sleep_wakeup_energy
- + fr->data_array2->wl_sleep_wakeup_energy
- + fr->data_array2->bl_floating_wakeup_energy;
- wakeup_T = wakeup_T_data=MAX(fr->data_array2->sram_sleep_wakeup_latency,
- MAX(fr->data_array2->wl_sleep_wakeup_latency,fr->data_array2->bl_floating_wakeup_latency));
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- wakeup_E_tag = fr->tag_array2->sram_sleep_wakeup_energy
- + fr->tag_array2->wl_sleep_wakeup_energy
- + fr->tag_array2->bl_floating_wakeup_energy;
- wakeup_T_tag=MAX(fr->tag_array2->sram_sleep_wakeup_latency,
- MAX(fr->tag_array2->wl_sleep_wakeup_latency,fr->tag_array2->bl_floating_wakeup_latency));
-
- wakeup_E += wakeup_E_tag;
- wakeup_T = MAX(wakeup_T_tag, wakeup_T_data);
-
- }
- cout << " Power gating Wakeup Latency (ns): " <<
- wakeup_T*1e9 << endl ;
- cout << " Power gating Wakeup Energy (nJ): " <<
- wakeup_E*1e9 << endl ;
-
-
-//extra power gating details
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- cout << " Data array: " << endl;
- else if (g_ip->pure_cam)
- cout << " CAM array: " << endl;
- else
- cout << " Fully associative cache array: " << endl;
-
- cout << "\t Sub-array Sleep Tx size (um) - " <<
- fr->data_array2->sram_sleep_tx_width << endl;
-
- // cout << "\t Sub-array Sleep Tx total size (um) - " <<
- // fr->data_array2->sram_sleep_tx_width << endl;
-
- cout << "\t Sub-array Sleep Tx total area (mm^2) - " <<
- fr->data_array2->sram_sleep_tx_area*1e-6 << endl;
-
- cout << "\t Sub-array wakeup time (ns) - " <<
- fr->data_array2->sram_sleep_wakeup_latency*1e9 << endl;
-
- cout << "\t Sub-array Tx energy (nJ) - " <<
- fr->data_array2->sram_sleep_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- cout << endl;
- cout << "\t WL Sleep Tx size (um) - " <<
- fr->data_array2->wl_sleep_tx_width << endl;
-
- // cout << "\t WL Sleep total Tx size (um) - " <<
- // fr->data_array2->wl_sleep_tx_width << endl;
-
- cout << "\t WL Sleep Tx total area (mm^2) - " <<
- fr->data_array2->wl_sleep_tx_area*1e-6 << endl;
-
- cout << "\t WL wakeup time (ns) - " <<
- fr->data_array2->wl_sleep_wakeup_latency*1e9 << endl;
-
- cout << "\t WL Tx energy (nJ) - " <<
- fr->data_array2->wl_sleep_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- cout << endl;
- cout << "\t BL floating wakeup time (ns) - " <<
- fr->data_array2->bl_floating_wakeup_latency*1e9 << endl;
-
- cout << "\t BL floating Tx energy (nJ) - " <<
- fr->data_array2->bl_floating_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
- cout << endl;
-
- cout << "\t Active mats per access - " << fr->data_array2->num_active_mats<data_array2->num_submarray_mats<pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << " Tag array: " << endl;
- cout << "\t Sub-array Sleep Tx size (um) - " <<
- fr->tag_array2->sram_sleep_tx_width << endl;
-
- // cout << "\t Sub-array Sleep Tx total size (um) - " <<
- // fr->tag_array2->sram_sleep_tx_width << endl;
-
- cout << "\t Sub-array Sleep Tx total area (mm^2) - " <<
- fr->tag_array2->sram_sleep_tx_area*1e-6 << endl;
-
- cout << "\t Sub-array wakeup time (ns) - " <<
- fr->tag_array2->sram_sleep_wakeup_latency*1e9 << endl;
-
- cout << "\t Sub-array Tx energy (nJ) - " <<
- fr->tag_array2->sram_sleep_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- cout << endl;
- cout << "\t WL Sleep Tx size (um) - " <<
- fr->tag_array2->wl_sleep_tx_width << endl;
-
- // cout << "\t WL Sleep total Tx size (um) - " <<
- // fr->tag_array2->wl_sleep_tx_width << endl;
-
- cout << "\t WL Sleep Tx total area (mm^2) - " <<
- fr->tag_array2->wl_sleep_tx_area*1e-6 << endl;
-
- cout << "\t WL wakeup time (ns) - " <<
- fr->tag_array2->wl_sleep_wakeup_latency*1e9 << endl;
-
- cout << "\t WL Tx energy (nJ) - " <<
- fr->tag_array2->wl_sleep_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- cout << endl;
- cout << "\t BL floating wakeup time (ns) - " <<
- fr->tag_array2->bl_floating_wakeup_latency*1e9 << endl;
-
- cout << "\t BL floating Tx energy (nJ) - " <<
- fr->tag_array2->bl_floating_wakeup_energy*1e9 << endl;
- //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- cout << endl;
-
- cout << "\t Active mats per access - " << fr->tag_array2->num_active_mats<tag_array2->num_submarray_mats<data_array2->access_time/1e-9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->access_time/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tH-tree delay outside banks (ns): " <<
- fr->data_array2->delay_route_to_bank * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_route_to_bank * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tH-tree input delay (inside a bank) (ns): " <<
- fr->data_array2->delay_input_htree * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_input_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->data_array2->delay_row_decoder * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->uca_q[i]->data_array2->delay_row_decoder * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- }
- else
- {
- cout << "\tCAM search delay (ns): " <<
- fr->data_array2->delay_matchlines * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_matchlines * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- }
-
- cout << "\tBitline delay (ns): " <<
- fr->data_array2->delay_bitlines/1e-9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_bitlines/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense Amplifier delay (ns): " <<
- fr->data_array2->delay_sense_amp * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_sense_amp*1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << "\tH-tree output delay (inside a bank) (ns): " <<
- fr->data_array2->delay_subarray_output_driver * 1e9 +
- fr->data_array2->delay_dout_htree * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->delay_subarray_output_driver * 1e9 +
- fr->uca_q[i]->data_array2->delay_dout_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- if (g_ip->power_gating)
- {
- cout << "\tPower gating wakeup time (ns) - " <<
- wakeup_T_data*1e9 << endl;
- }
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- /* tag array stats */
- cout << endl << " Tag side (with Output driver) (ns): " <<
- fr->tag_array2->access_time/1e-9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->access_time/1e-9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
- cout << "\tH-tree delay outside banks (ns): " <<
- fr->tag_array2->delay_route_to_bank * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_route_to_bank * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tH-tree input delay (inside a bank) (ns): " <<
- fr->tag_array2->delay_input_htree * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_input_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tDecoder + wordline delay (ns): " <<
- fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->tag_array2->delay_row_decoder * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
- fr->uca_q[i]->tag_array2->delay_row_decoder * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tBitline delay (ns): " <<
- fr->tag_array2->delay_bitlines/1e-9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_bitlines * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tSense Amplifier delay (ns): " <<
- fr->tag_array2->delay_sense_amp * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_sense_amp * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tComparator delay (ns): " <<
- fr->tag_array2->delay_comparator * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_comparator * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
-
- cout << "\tH-tree output delay (inside a bank) (ns): " <<
- fr->tag_array2->delay_subarray_output_driver * 1e9 +
- fr->tag_array2->delay_dout_htree * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->delay_subarray_output_driver * 1e9 +
- fr->uca_q[i]->tag_array2->delay_dout_htree * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout << endl;
- if (g_ip->power_gating)
- {
- cout << "\tPower gating wakeup time (ns) - " <<
- wakeup_T_tag*1e9 << endl;
- }
- }
-
-
-
- /* Energy/Power stats */
- cout << endl << endl << "Power Components:" << endl << endl;
-
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- {
- cout << " Data array: Total dynamic read energy/access (nJ): " <<
- fr->data_array2->power.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- if (g_ip->power_gating)
- {
- cout << "\tTotal leakage power of a bank, power gated ";
-
- if (!g_ip->user_defined_vcc_underflow)
- {
- cout << "with ";
- }
- else
- {
- cout << "without ";
- }
- cout<<"retaining memory content, including its network outside (mW): " <<
- (g_ip->long_channel_device ? fr->data_array2->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.power_gated_leakage)*1e3 << endl;
- }
-// else
-// {
- cout << "\tTotal leakage power of a bank without power gating, including its network outside (mW): " <<
- (g_ip->long_channel_device ? fr->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.leakage)*1e3;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device ?fr->uca_q[i]->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->data_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-// }
-
- cout << "\tTotal energy in H-tree outside banks (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tInput H-tree inside bank Energy (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic) * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_bitlines.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- if (g_ip->power_gating)
- {
- cout << "\tTotal leakage power in H-tree outside a bank when power gated (that includes both "
- "address and data network) ((mW)): " <<
- (g_ip->long_channel_device?fr->data_array2->power_routing_to_bank.readOp.power_gated_leakage * long_channel_leakage_reduction: fr->data_array2->power_routing_to_bank.readOp.power_gated_leakage) * 1e3 << endl;
- }
-// else
-// {
- cout << "\tTotal leakage power in H-tree outside a bank (that includes both "
- "address and data network) ((mW)): " <<
- (g_ip->long_channel_device? fr->data_array2->power_routing_to_bank.readOp.leakage * long_channel_leakage_reduction: fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device? fr->uca_q[i]->data_array2->power_routing_to_bank.readOp.leakage* long_channel_leakage_reduction: fr->uca_q[i]->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-// }
- // cout << "\tTotal leakage power in H-tree (that includes both "
- // "address and data network) ((mW)): " <<
- // (fr->data_array2->power_addr_input_htree.readOp.leakage +
- // fr->data_array2->power_data_output_htree.readOp.leakage +
- // fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
-
- // cout << "\tTotal leakage power in cells (mW): " <<
- // (fr->data_array2->array_leakage) * 1e3 << endl;
- // cout << "\tTotal leakage power in row logic(mW): " <<
- // (fr->data_array2->wl_leakage) * 1e3 << endl;
- // cout << "\tTotal leakage power in column logic(mW): " <<
- // (fr->data_array2->cl_leakage) * 1e3 << endl;
- //
- // cout << "\tTotal gate leakage power in H-tree (that includes both "
- // "address and data network) ((mW)): " <<
- // (fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
- // fr->data_array2->power_data_output_htree.readOp.gate_leakage +
- // fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
- }
-
- if (g_ip->pure_cam||g_ip->fully_assoc)
- {
-
- if (g_ip->pure_cam) cout << " CAM array:"<data_array2->power.searchOp.dynamic * 1e9 << endl;
- // cout << "\tTotal energy in H-tree (that includes both "
- // "match key and data transfer) (nJ): " <<
- // (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- // fr->data_array2->power_htree_out_search.searchOp.dynamic +
- // fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
- // cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
- // (fr->data_array2->power_htree_in_search.searchOp.dynamic +
- // fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
- // cout << "\tSearchlines (nJ): " <<
- // fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- // fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
- // cout << "\tMatchlines (nJ): " <<
- // fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- // fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
- // cout << "\tSub-array output driver (nJ): " <<
- // fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
- //
- //
- // cout <data_array2->power.readOp.dynamic * 1e9 << endl;
- // cout << "\tTotal energy in H-tree (that includes both "
- // "address and data transfer) (nJ): " <<
- // (fr->data_array2->power_addr_input_htree.readOp.dynamic +
- // fr->data_array2->power_data_output_htree.readOp.dynamic +
- // fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
- // cout << "\tOutput Htree inside bank Energy (nJ): " <<
- // fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
- // cout << "\tDecoder (nJ): " <<
- // fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- // fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
- // cout << "\tWordline (nJ): " <<
- // fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
- // cout << "\tBitline mux & associated drivers (nJ): " <<
- // fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- // fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- // fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
- // cout << "\tSense amp mux & associated drivers (nJ): " <<
- // fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- // fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- // fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- // fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- // fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- // fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
- // cout << "\tBitlines (nJ): " <<
- // fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- // fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl;
- // cout << "\tSense amplifier energy (nJ): " <<
- // fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
- // cout << "\tSub-array output driver (nJ): " <<
- // fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
- //
- // cout << endl <<" Total leakage power of a bank (mW): " <<
- // fr->data_array2->power.readOp.leakage * 1e3 << endl;
- // }
- // else
- // {
- if (g_ip->fully_assoc) cout << " Fully associative array:"<data_array2->power.searchOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tTotal energy in H-tree outside banks(that includes both "
- "match key and data transfer) (nJ): " <<
- (fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tMatch Key input Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_in_search.searchOp.dynamic ) * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_htree_in_search.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tResult output Htrees inside bank Energy (nJ): " <<
- (fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_htree_out_search.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSearchlines (nJ): " <<
- fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_searchline.searchOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tMatchlines (nJ): " <<
- fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- if (g_ip->fully_assoc)
- {
- cout << "\tData portion wordline (nJ): " <<
- fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tData Bitlines (nJ): " <<
- fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_sense_amps.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- }
-
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout <data_array2->power.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << "\tTotal energy in H-tree outside banks(that includes both "
- "address and data transfer) (nJ): " <<
- (fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tInput Htree inside bank Energy (nJ): " <<
- (fr->data_array2->power_addr_input_htree.readOp.dynamic ) * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tOutput Htree inside bank Energy (nJ): " <<
- fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tDecoder (nJ): " <<
- fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tWordline (nJ): " <<
- fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << "\tBitlines (nJ): " <<
- fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_bitlines.readOp.dynamic * 1e9 +
- fr->uca_q[i]->data_array2->power_prechg_eq_drivers.readOp.dynamic* 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amplifier energy (nJ): " <<
- fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSub-array output driver (nJ): " <<
- fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << endl <<" Total leakage power of a bank, including its network outside (mW): " <<
- (g_ip->long_channel_device ? fr->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->data_array2->power.readOp.leakage)*1e3; //CAM/FA does not support PG yet
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device ?fr->uca_q[i]->data_array2->power.readOp.leakage*long_channel_leakage_reduction : fr->uca_q[i]->data_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- }
-
-
- if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
- fr->tag_array2->power.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- if (g_ip->power_gating)
- {
- cout << "\tTotal leakage power of a bank, power gated ";
- if (!g_ip->user_defined_vcc_underflow)
- {
- cout << "with ";
- }
- else
- {
- cout << "without ";
- }
- cout<<"retaining memory content, including its network outside (mW): " <<
- (g_ip->long_channel_device ? fr->tag_array2->power.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->tag_array2->power.readOp.power_gated_leakage)* 1e3 << endl;
- }
-// else
-// {
- cout << "\tTotal leakage power of a bank without power gating, including its network outside (mW): " <<
- (g_ip->long_channel_device ? fr->tag_array2->power.readOp.leakage * long_channel_leakage_reduction: fr->tag_array2->power.readOp.leakage)* 1e3;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device ? fr->uca_q[i]->tag_array2->power.readOp.leakage *long_channel_leakage_reduction: fr->uca_q[i]->tag_array2->power.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-// }
-// cout << "\tTotal leakage read/write power of a bank (mW): " <<
-// fr->tag_array2->power.readOp.leakage * 1e3 << endl;
- cout << "\tTotal energy in H-tree outside banks (that includes both "
- "address and data transfer) (nJ): " <<
- (fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_routing_to_bank.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
-
- cout << "\tInput H-tree inside banks Energy (nJ): " <<
- (fr->tag_array2->power_addr_input_htree.readOp.dynamic) * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_addr_input_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tOutput Htree inside a bank Energy (nJ): " <<
- fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tDecoder (nJ): " <<
- fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9) <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tWordline (nJ): " <<
- fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_row_decoders.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitline mux & associated drivers (nJ): " <<
- fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amp mux & associated drivers (nJ): " <<
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
- fr->uca_q[i]->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9)
- <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tBitlines precharge and equalization circuit (nJ): " <<
- fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
- cout << "\tBitlines (nJ): " <<
- fr->tag_array2->power_bitlines.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_bitlines.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSense amplifier energy (nJ): " <<
- fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_sense_amps.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- cout << "\tSub-array output driver (nJ): " <<
- fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 ;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; iuca_q[i]->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-
- if (g_ip->power_gating)
- {
- cout << "\tTotal leakage power in H-tree outside a bank when power gated (that includes both "
- "address and data network) ((mW)): " <<
- (g_ip->long_channel_device ? fr->tag_array2->power_routing_to_bank.readOp.power_gated_leakage*long_channel_leakage_reduction : fr->tag_array2->power_routing_to_bank.readOp.power_gated_leakage) * 1e3 << endl;
- }
-// else
-// {
- cout << "\tTotal leakage power in H-tree outside a bank (that includes both "
- "address and data network) without power gating((mW)): " <<
- (g_ip->long_channel_device ? fr->tag_array2->power_routing_to_bank.readOp.leakage*long_channel_leakage_reduction : fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3;
- if (dvs)
- {
- cout<<" (@DVS_Level0); ";
- for (i = 0; ilong_channel_device ? fr->uca_q[i]->tag_array2->power_routing_to_bank.readOp.leakage *long_channel_leakage_reduction : fr->uca_q[i]->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 <<" (@DVS_Level"<< i+1<<"_Vdd=" << g_ip->dvs_voltage[i]<<"); ";
- }
- cout<< endl;
-// }
-
-// cout << "\tTotal leakage power of a bank (mW): " <<
-// fr->tag_array2->power.readOp.leakage * 1e3 << endl;
-// cout << "\tTotal leakage power in H-tree (that includes both "
-// "address and data network) ((mW)): " <<
-// (fr->tag_array2->power_addr_input_htree.readOp.leakage +
-// fr->tag_array2->power_data_output_htree.readOp.leakage +
-// fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl;
-//
-// cout << "\tTotal leakage power in cells (mW): " <<
-// (fr->tag_array2->array_leakage) * 1e3 << endl;
-// cout << "\tTotal leakage power in row logic(mW): " <<
-// (fr->tag_array2->wl_leakage) * 1e3 << endl;
-// cout << "\tTotal leakage power in column logic(mW): " <<
-// (fr->tag_array2->cl_leakage) * 1e3 << endl;
-// cout << "\tTotal gate leakage power in H-tree (that includes both "
-// "address and data network) ((mW)): " <<
-// (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
-// fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
-// fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl;
- }
-
- cout << endl << endl << "Area Components:" << endl << endl;
- /* Data array area stats */
- if (!(g_ip->pure_cam || g_ip->fully_assoc))
- cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else if (g_ip->pure_cam)
- cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
- else
- cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
-
- cout << "\tHeight (mm): " <<
- fr->data_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->data_array2->all_banks_width*1e-3 << endl;
- if (g_ip->print_detail) {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->data_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->data_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->data_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->data_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->data_array2->subarray_length*1e-3 << endl;
- if (g_ip->power_gating)
- {
- overhead_data = (fr->data_array2->area/fr->uca_pg_reference->data_array2->area-1)*100;//%;
- cout << " Power gating circuits (sleep transistors) induced area overhead: " << overhead_data <<"%" <pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem)
- {
- cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
- cout << "\tHeight (mm): " <<
- fr->tag_array2->all_banks_height*1e-3 << endl;
- cout << "\tWidth (mm): " <<
- fr->tag_array2->all_banks_width*1e-3 << endl;
-
- if (g_ip->print_detail)
- {
- cout << "\tArea efficiency (Memory cell area/Total area) - " <<
- fr->tag_array2->area_efficiency << " %" << endl;
- cout << "\t\tMAT Height (mm): " <<
- fr->tag_array2->mat_height*1e-3 << endl;
- cout << "\t\tMAT Length (mm): " <<
- fr->tag_array2->mat_length*1e-3 << endl;
- cout << "\t\tSubarray Height (mm): " <<
- fr->tag_array2->subarray_height*1e-3 << endl;
- cout << "\t\tSubarray Length (mm): " <<
- fr->tag_array2->subarray_length*1e-3 << endl;
- }
-
- if (g_ip->power_gating)
- {
- overhead_tag = (fr->tag_array2->area/fr->uca_pg_reference->tag_array2->area-1)*100;//%;
- cout << " Power gating circuits (sleep transistors) induced area overhead: " << overhead_tag <<"%" <add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
-
-
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-// g_ip->is_cache=true;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
-
- if (!g_ip->error_checking()) exit(0);
-
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
-
- solve(&fin_res);
-
- if (!g_ip->dvs_voltage.empty())
- {
- update_dvs(&fin_res);
- }
- if (g_ip->power_gating)
- {
- update_pg(&fin_res);//this is needed for compute area overhead of power-gating, even the gated power is calculated together un-gated leakage
- }
-
-// g_ip->display_ip();
-// output_UCA(&fin_res);
-// output_data_csv(fin_res);
-// Wire wprint;//reset wires to original configuration as in *.cfg file (dvs level 0)
-// Wire::print_wire();
- // delete (g_ip);
-
- return fin_res;
-}
-
-//McPAT's plain interface, please keep !!!
-uca_org_t init_interface(InputParameter* const local_interface)
-{
- // g_ip = new InputParameter();
- //g_ip->add_ecc_b_ = true;
-
- uca_org_t fin_res;
- fin_res.valid = false;
-
- g_ip = local_interface;
-
-
-// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
-// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
-// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
-// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
-//
-// g_ip->ic_proj_type = interconnect_projection_type_in;
-// g_ip->wire_is_mat_type = wire_inside_mat_type_in;
-// g_ip->wire_os_mat_type = wire_outside_mat_type_in;
-// g_ip->burst_len = BURST_LENGTH_in;
-// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
-// g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
-//
-// g_ip->cache_sz = cache_size;
-// g_ip->line_sz = line_size;
-// g_ip->assoc = associativity;
-// g_ip->nbanks = banks;
-// g_ip->out_w = output_width;
-// g_ip->specific_tag = specific_tag;
-// if (tag_width == 0) {
-// g_ip->tag_w = 42;
-// }
-// else {
-// g_ip->tag_w = tag_width;
-// }
-//
-// g_ip->access_mode = access_mode;
-// g_ip->delay_wt = obj_func_delay;
-// g_ip->dynamic_power_wt = obj_func_dynamic_power;
-// g_ip->leakage_power_wt = obj_func_leakage_power;
-// g_ip->area_wt = obj_func_area;
-// g_ip->cycle_time_wt = obj_func_cycle_time;
-// g_ip->delay_dev = dev_func_delay;
-// g_ip->dynamic_power_dev = dev_func_dynamic_power;
-// g_ip->leakage_power_dev = dev_func_leakage_power;
-// g_ip->area_dev = dev_func_area;
-// g_ip->cycle_time_dev = dev_func_cycle_time;
-// g_ip->temp = temp;
-//
-// g_ip->F_sz_nm = tech_node;
-// g_ip->F_sz_um = tech_node / 1000;
-// g_ip->is_main_mem = (main_mem != 0) ? true : false;
-// g_ip->is_cache = (cache ==1) ? true : false;
-// g_ip->pure_ram = (cache ==0) ? true : false;
-// g_ip->pure_cam = (cache ==2) ? true : false;
-// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
-// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
-// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
-//
-// g_ip->num_rw_ports = rw_ports;
-// g_ip->num_rd_ports = excl_read_ports;
-// g_ip->num_wr_ports = excl_write_ports;
-// g_ip->num_se_rd_ports = single_ended_read_ports;
-// g_ip->num_search_ports = search_ports;
-//
-// g_ip->print_detail = 1;
-// g_ip->nuca = 0;
-//
-// if (force_wiretype == 0)
-// {
-// g_ip->wt = Global;
-// g_ip->force_wiretype = false;
-// }
-// else
-// { g_ip->force_wiretype = true;
-// if (wiretype==10) {
-// g_ip->wt = Global_10;
-// }
-// if (wiretype==20) {
-// g_ip->wt = Global_20;
-// }
-// if (wiretype==30) {
-// g_ip->wt = Global_30;
-// }
-// if (wiretype==5) {
-// g_ip->wt = Global_5;
-// }
-// if (wiretype==0) {
-// g_ip->wt = Low_swing;
-// }
-// }
-// //g_ip->wt = Global_5;
-// if (force_config == 0)
-// {
-// g_ip->force_cache_config = false;
-// }
-// else
-// {
-// g_ip->force_cache_config = true;
-// g_ip->ndbl=ndbl;
-// g_ip->ndwl=ndwl;
-// g_ip->nspd=nspd;
-// g_ip->ndcm=ndcm;
-// g_ip->ndsam1=ndsam1;
-// g_ip->ndsam2=ndsam2;
-//
-//
-// }
-//
-// if (ecc==0){
-// g_ip->add_ecc_b_=false;
-// }
-// else
-// {
-// g_ip->add_ecc_b_=true;
-// }
-
-
- g_ip->error_checking();
-
- init_tech_params(g_ip->F_sz_um, false);
- Wire winit; // Do not delete this line. It initializes wires.
- //solve(&fin_res);
- //g_ip->display_ip();
-
- //solve(&fin_res);
- //output_UCA(&fin_res);
- //output_data_csv(fin_res);
- // delete (g_ip);
-
- return fin_res;
-}
-
-void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
-{
- // Copy the InputParameter to global interface (g_ip) and do error checking.
- g_ip = local_interface;
- g_ip->error_checking();
-
- // Initialize technology parameters
- init_tech_params(g_ip->F_sz_um,false);
-
- Wire winit; // Do not delete this line. It initializes wires.
-
- // This corresponds to solve() in the initialization process.
- update_dvs(fin_res);
-}
diff --git a/cacti/main.cc b/cacti/main.cc
deleted file mode 100644
index 2e40ef3..0000000
--- a/cacti/main.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-/*------------------------------------------------------------
- * CACTI 6.5
- * Copyright 2008 Hewlett-Packard Development Corporation
- * All Rights Reserved
- *
- * Permission to use, copy, and modify this software and its documentation is
- * hereby granted only under the following terms and conditions. Both the
- * above copyright notice and this permission notice must appear in all copies
- * of the software, derivative works or modified versions, and any portions
- * thereof, and both notices must appear in supporting documentation.
- *
- * Users of this software agree to the terms and conditions set forth herein, and
- * hereby grant back to Hewlett-Packard Company and its affiliated companies ("HP")
- * a non-exclusive, unrestricted, royalty-free right and license under any changes,
- * enhancements or extensions made to the core functions of the software, including
- * but not limited to those affording compatibility with other hardware or software
- * environments, but excluding applications which incorporate this software.
- * Users further agree to use their best efforts to return to HP any such changes,
- * enhancements or extensions that they make and inform HP of noteworthy uses of
- * this software. Correspondence should be provided to HP at:
- *
- * Director of Intellectual Property Licensing
- * Office of Strategy and Technology
- * Hewlett-Packard Company
- * 1501 Page Mill Road
- * Palo Alto, California 94304
- *
- * This software may be distributed (but not offered for sale or transferred
- * for compensation) to third parties, provided such third parties agree to
- * abide by the terms and conditions of this notice.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND HP DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL HP
- * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *------------------------------------------------------------*/
-
-#include "io.h"
-#include
-
-using namespace std;
-
-
-int main(int argc,char *argv[])
-{
-
- uca_org_t result;
- if (argc != 53 && argc != 55)
- {
- bool infile_specified = false;
- string infile_name("");
-
- for (int32_t i = 0; i < argc; i++)
- {
- if (argv[i] == string("-infile"))
- {
- infile_specified = true;
- i++;
- infile_name = argv[i];
- }
- }
-
- if (infile_specified == false)
- {
- cerr << " Invalid arguments -- how to use CACTI:" << endl;
- cerr << " 1) cacti -infile " << endl;
- cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" << endl;
- cerr << " No. of arguments input - " << argc << endl;
- exit(1);
- }
- else
- {
- result = cacti_interface(infile_name);
- }
- }
- else if (argc == 53)
- {
- result = cacti_interface(atoi(argv[ 1]),
- atoi(argv[ 2]),
- atoi(argv[ 3]),
- atoi(argv[ 4]),
- atoi(argv[ 5]),
- atoi(argv[ 6]),
- atoi(argv[ 7]),
- atoi(argv[ 8]),
- atoi(argv[ 9]),
- atof(argv[10]),
- atoi(argv[11]),
- atoi(argv[12]),
- atoi(argv[13]),
- atoi(argv[14]),
- atoi(argv[15]),
- atoi(argv[16]),
- atoi(argv[17]),
- atoi(argv[18]),
- atoi(argv[19]),
- atoi(argv[20]),
- atoi(argv[21]),
- atoi(argv[22]),
- atoi(argv[23]),
- atoi(argv[24]),
- atoi(argv[25]),
- atoi(argv[26]),
- atoi(argv[27]),
- atoi(argv[28]),
- atoi(argv[29]),
- atoi(argv[30]),
- atoi(argv[31]),
- atoi(argv[32]),
- atoi(argv[33]),
- atoi(argv[34]),
- atoi(argv[35]),
- atoi(argv[36]),
- atoi(argv[37]),
- atoi(argv[38]),
- atoi(argv[39]),
- atoi(argv[40]),
- atoi(argv[41]),
- atoi(argv[42]),
- atoi(argv[43]),
- atoi(argv[44]),
- atoi(argv[45]),
- atoi(argv[46]),
- atoi(argv[47]),
- atoi(argv[48]),
- atoi(argv[49]),
- atoi(argv[50]),
- atoi(argv[51]),
- atoi(argv[52]));
- }
- else
- {
- result = cacti_interface(atoi(argv[ 1]),
- atoi(argv[ 2]),
- atoi(argv[ 3]),
- atoi(argv[ 4]),
- atoi(argv[ 5]),
- atoi(argv[ 6]),
- atoi(argv[ 7]),
- atoi(argv[ 8]),
- atof(argv[ 9]),
- atoi(argv[10]),
- atoi(argv[11]),
- atoi(argv[12]),
- atoi(argv[13]),
- atoi(argv[14]),
- atoi(argv[15]),
- atoi(argv[16]),
- atoi(argv[17]),
- atoi(argv[18]),
- atoi(argv[19]),
- atoi(argv[20]),
- atoi(argv[21]),
- atoi(argv[22]),
- atoi(argv[23]),
- atoi(argv[24]),
- atoi(argv[25]),
- atoi(argv[26]),
- atoi(argv[27]),
- atoi(argv[28]),
- atoi(argv[29]),
- atoi(argv[30]),
- atoi(argv[31]),
- atoi(argv[32]),
- atoi(argv[33]),
- atoi(argv[34]),
- atoi(argv[35]),
- atoi(argv[36]),
- atoi(argv[37]),
- atoi(argv[38]),
- atoi(argv[39]),
- atoi(argv[40]),
- atoi(argv[41]),
- atoi(argv[42]),
- atoi(argv[43]),
- atoi(argv[44]),
- atoi(argv[45]),
- atoi(argv[46]),
- atoi(argv[47]),
- atoi(argv[48]),
- atoi(argv[49]),
- atoi(argv[50]),
- atoi(argv[51]),
- atoi(argv[52]),
- atoi(argv[53]),
- atoi(argv[54]));
- }
-
- result.cleanup();
-// delete result.data_array2;
-// if (result.tag_array2!=NULL)
-// delete result.tag_array2;
-
- return 0;
-}
-
diff --git a/cacti/makefile b/cacti/makefile
deleted file mode 100644
index 2728691..0000000
--- a/cacti/makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-TAR = cacti
-
-.PHONY: dbg opt depend clean clean_dbg clean_opt
-
-all: opt
-
-dbg: $(TAR).mk obj_dbg
- @$(MAKE) TAG=dbg -C . -f $(TAR).mk
-
-opt: $(TAR).mk obj_opt
- @$(MAKE) TAG=opt -C . -f $(TAR).mk
-
-obj_dbg:
- mkdir $@
-
-obj_opt:
- mkdir $@
-
-clean: clean_dbg clean_opt
-
-clean_dbg: obj_dbg
- @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean
- rm -rf $<
-
-clean_opt: obj_opt
- @$(MAKE) TAG=opt -C . -f $(TAR).mk clean
- rm -rf $<
-
diff --git a/cacti/mat.cc b/cacti/mat.cc
deleted file mode 100755
index 221369d..0000000
--- a/cacti/mat.cc
+++ /dev/null
@@ -1,1954 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include "mat.h"
-#include
-
-
-Mat::Mat(const DynamicParameter & dyn_p)
- :dp(dyn_p),
- power_subarray_out_drv(),
- delay_fa_tag(0), delay_cam(0),
- delay_before_decoder(0), delay_bitline(0),
- delay_wl_reset(0), delay_bl_restore(0),
- delay_searchline(0), delay_matchchline(0),
- delay_cam_sl_restore(0), delay_cam_ml_reset(0),
- delay_fa_ram_wl(0),delay_hit_miss_reset(0),
- delay_hit_miss(0),
- subarray(dp, dp.fully_assoc),
- power_bitline(), per_bitline_read_energy(0),
- deg_bl_muxing(dp.deg_bl_muxing),
- num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
- delay_writeback(0),
- cell(subarray.cell), cam_cell(subarray.cam_cell),
- is_dram(dyn_p.is_dram),
- pure_cam(dyn_p.pure_cam),
- num_mats(dp.num_mats),
- power_sa(), delay_sa(0),
- leak_power_sense_amps_closed_page_state(0),
- leak_power_sense_amps_open_page_state(0),
- delay_subarray_out_drv(0),
- delay_comparator(0), power_comparator(),
- num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
- num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
- num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir),
- array_leakage(0),
- wl_leakage(0),
- cl_leakage(0),
- sram_sleep_tx(0)
- {
- assert(num_subarrays_per_mat <= 4);
- assert(num_subarrays_per_row <= 2);
- is_fa = (dp.fully_assoc) ? true : false;
- camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
-
- if (is_fa || pure_cam)
- num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
-
- if (dp.use_inp_params == 1) {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
-
- }
-
- double number_sa_subarray;
-
- if (!is_fa && !pure_cam)
- {
- number_sa_subarray = subarray.num_cols / deg_bl_muxing;
- }
- else if (is_fa && !pure_cam)
- {
- number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
- }
-
- else
- {
- number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
- }
-
- int num_dec_signals = subarray.num_rows;
- double C_ld_bit_mux_dec_out = 0;
- double C_ld_sa_mux_lev_1_dec_out = 0;
- double C_ld_sa_mux_lev_2_dec_out = 0;
- double R_wire_wl_drv_out;
-
- if (!is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
- }
- else if (is_fa && !pure_cam)
- {
- R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
- }
- else
- {
- R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
- }
-
- double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
- double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
-
- if (deg_bl_muxing > 1)
- {
- C_ld_bit_mux_dec_out =
- (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (dp.Ndsam_lev_1 > 1)
- {
- C_ld_sa_mux_lev_1_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- C_ld_sa_mux_lev_2_dec_out =
- (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
- num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
- }
-
- if (num_subarrays_per_row >= 2)
- {
- // wire heads for both right and left side of a mat, so half the resistance
- R_wire_bit_mux_dec_out /= 2.0;
- R_wire_sa_mux_dec_out /= 2.0;
- }
-
-
- row_dec = new Decoder(
- num_dec_signals,
- false,
- subarray.C_wl,
- R_wire_wl_drv_out,
- false/*is_fa*/,
- is_dram,
- true,
- camFlag? cam_cell:cell,
- g_ip->power_gating? true:false,
- subarray.num_rows);
-
-// row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for OOO programming
-// if (is_fa && (!dp.is_tag))
-// {
-// row_dec->exist = true;
-// }
- bit_mux_dec = new Decoder(
- deg_bl_muxing,// This number is 1 for FA or CAM
- false,
- C_ld_bit_mux_dec_out,
- R_wire_bit_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell,
- g_ip->power_gating? true:false);
- sa_mux_lev_1_dec = new Decoder(
- dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
- dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
- C_ld_sa_mux_lev_1_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell,
- g_ip->power_gating? true:false);
- sa_mux_lev_2_dec = new Decoder(
- dp.Ndsam_lev_2, // This number is 1 for FA or CAM
- false,
- C_ld_sa_mux_lev_2_dec_out,
- R_wire_sa_mux_dec_out,
- false/*is_fa*/,
- is_dram,
- false,
- camFlag? cam_cell:cell,
- g_ip->power_gating? true:false);
-
- double C_wire_predec_blk_out;
- double R_wire_predec_blk_out;
-
- if (!is_fa && !pure_cam)
- {
-
- C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
- R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
-
- }
- else //for pre-decode block's load is same for both FA and CAM
- {
- C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
- R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
- }
-
-
- if (is_fa||pure_cam)
- num_dec_signals += _log2(num_subarrays_per_mat);
-
- PredecBlk * r_predec_blk1 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- true);
- PredecBlk * r_predec_blk2 = new PredecBlk(
- num_dec_signals,
- row_dec,
- C_wire_predec_blk_out,
- R_wire_predec_blk_out,
- num_subarrays_per_mat,
- is_dram,
- false);
- PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
- PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
- PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
- PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
- dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
- dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
-
- PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
- PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
- PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
- PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
- way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
- dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
-
- r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
- b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
- sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
- sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
-
- subarray_out_wire = new Wire(Global, (g_ip->cl_vertical?subarray.area.w:subarray.area.h),1,1,inside_mat);//should be subarray.area.w; if with /2 means average length
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
-
- if (is_fa || pure_cam)
-
- { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
- driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
- cam_bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- if (!pure_cam)
- {
- //This is only used for fully asso not pure CAM
- driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- }
-
- else
- {
- driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
- bl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
- }
- double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
- double w_row_decoder = area_row_decoder / subarray.area.get_h();
-
- double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
- compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
-
- /* This means the subarray drivers are along the vertical (y) direction since / subarray.area.get_w() is used;
- * so the subarray_out_wire (actually the drivers) under the subarray and along the horizontal (x) direction
- * So as mentioned above @ line 271
- * subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//should be subarray.area.w
- * change the out_wire (driver to along y direction need carefully rethinking
- * rather than just simply switch w with h )
- * */
- double h_subarray_out_drv = subarray_out_wire->area.get_area() *
- (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
-
-
- h_subarray_out_drv *= (RWP + ERP + SCHP);
-
- double h_comparators = 0.0;
- double w_row_predecode_output_wires = 0.0;
- double h_bit_mux_dec_out_wires = 0.0;
- double h_senseamp_mux_dec_out_wires = 0.0;
-
- if ((!is_fa)&&(dp.is_tag))
- {
- //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
- h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
- h_comparators *= (RWP + ERP);
- }
-
- //power-gating circuit
- bool is_footer = false;
- double Isat_subarray = 2* simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, true);//only one wordline active in a subarray 2 means two inverters in an SRAM cell
- double detalV_array, deltaV_wl, deltaV_floatingBL;
- double c_wakeup_array;
-
- if (!(is_fa || pure_cam) && g_ip->power_gating)
- {//for SRAM only at this moment
- c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true);//1 inv
- c_wakeup_array += 2*drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true)
- + drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, true);//1 inv
- c_wakeup_array *= subarray.num_rows;//all the SRAM cells in a bitline is connected to the sleep tx to provide Vcc_min
- detalV_array = g_tp.sram_cell.Vdd-g_tp.sram_cell.Vcc_min;
-
- sram_sleep_tx = new Sleep_tx (g_ip->perfloss,
- Isat_subarray,
- is_footer,
- c_wakeup_array,
- detalV_array,
- 1,
- cell);
-
- subarray.area.set_h(subarray.area.h+ sram_sleep_tx->area.h);
-
- //TODO: add the sleep tx in the wl driver and
- }
-
-
- int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
- int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
- w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
-
- double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
- (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
- h_subarray_out_drv + h_comparators);
-
- double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
-
- if (deg_bl_muxing > 1)
- {
- h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_1 > 1)
- {
- h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
- if (dp.Ndsam_lev_2 > 1)
- {
- h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
- }
-
- double h_addr_datain_wires;
- if (!g_ip->ver_htree_wires_over_array)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
- (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
- if (is_fa || pure_cam)
- {
- h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
- (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
- g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
- (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
- }
- //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
- //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
- h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
- h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
- h_addr_datain_wires +
- h_bit_mux_dec_out_wires +
- h_senseamp_mux_dec_out_wires;
-
- }
-
- // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
- double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
- b_mux_predec_blk_drv1->area.get_area() +
- sa_mux_lev_1_predec_blk_drv1->area.get_area() +
- sa_mux_lev_2_predec_blk_drv1->area.get_area() +
- way_sel_drv1->area.get_area() +
- r_predec_blk_drv2->area.get_area() +
- b_mux_predec_blk_drv2->area.get_area() +
- sa_mux_lev_1_predec_blk_drv2->area.get_area() +
- sa_mux_lev_2_predec_blk_drv2->area.get_area() +
- r_predec_blk1->area.get_area() +
- b_mux_predec_blk1->area.get_area() +
- sa_mux_lev_1_predec_blk1->area.get_area() +
- sa_mux_lev_2_predec_blk1->area.get_area() +
- r_predec_blk2->area.get_area() +
- b_mux_predec_blk2->area.get_area() +
- sa_mux_lev_1_predec_blk2->area.get_area() +
- sa_mux_lev_2_predec_blk2->area.get_area() +
- bit_mux_dec->area.get_area() +
- sa_mux_lev_1_dec->area.get_area() +
- sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
-
- double area_efficiency_mat;
-
-// if (!is_fa)
-// {
- assert(num_subarrays_per_mat/num_subarrays_per_row>0);
- area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
- area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
- area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
- area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
-
-// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<0);
- assert(area.w>0);
-// }
-// else
-// {
-// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
-// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
-// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
-// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
-// }
- }
-
-
-
-Mat::~Mat()
-{
- delete row_dec;
- delete bit_mux_dec;
- delete sa_mux_lev_1_dec;
- delete sa_mux_lev_2_dec;
-
- delete r_predec->blk1;
- delete r_predec->blk2;
- delete b_mux_predec->blk1;
- delete b_mux_predec->blk2;
- delete sa_mux_lev_1_predec->blk1;
- delete sa_mux_lev_1_predec->blk2;
- delete sa_mux_lev_2_predec->blk1;
- delete sa_mux_lev_2_predec->blk2;
- delete dummy_way_sel_predec_blk1;
- delete dummy_way_sel_predec_blk2;
-
- delete r_predec->drv1;
- delete r_predec->drv2;
- delete b_mux_predec->drv1;
- delete b_mux_predec->drv2;
- delete sa_mux_lev_1_predec->drv1;
- delete sa_mux_lev_1_predec->drv2;
- delete sa_mux_lev_2_predec->drv1;
- delete sa_mux_lev_2_predec->drv2;
- delete way_sel_drv1;
- delete dummy_way_sel_predec_blk_drv2;
-
- delete r_predec;
- delete b_mux_predec;
- delete sa_mux_lev_1_predec;
- delete sa_mux_lev_2_predec;
-
- delete subarray_out_wire;
- if (!pure_cam)
- delete bl_precharge_eq_drv;
-
- if (is_fa || pure_cam)
- {
- delete sl_precharge_eq_drv ;
- delete sl_data_drv ;
- delete cam_bl_precharge_eq_drv;
- delete ml_precharge_drv;
- delete ml_to_ram_wl_drv;
- }
- if (sram_sleep_tx !=0)
- {
- delete sram_sleep_tx;
- }
-}
-
-
-
-double Mat::compute_delays(double inrisetime)
-{
- int k;
- double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
- double outrisetime_search, outrisetime, row_dec_outrisetime;
- // delay calculation for tags of fully associative cache
- if (is_fa || pure_cam)
- {
- //Compute search access time
- outrisetime_search = compute_cam_delay(inrisetime);
- if (is_fa)
- {
- bl_precharge_eq_drv->compute_delay(0);
- k = ml_to_ram_wl_drv->number_gates - 1;
- rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
- C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
- tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-
- R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
- R_bl = subarray.num_rows * r_b_metal;
- C_bl = subarray.C_bl;
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-
- outrisetime_search = compute_bitline_delay(outrisetime_search);
- outrisetime_search = compute_sa_delay(outrisetime_search);
- }
- outrisetime_search = compute_subarray_out_drv(outrisetime_search);
- subarray_out_wire->set_in_rise_time(outrisetime_search);
- outrisetime_search = subarray_out_wire->signal_rise_time();
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-
- //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- if (pure_cam)
- {
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- }
- return outrisetime_search;
- }
- else
- {
- bl_precharge_eq_drv->compute_delay(0);
- if (row_dec->exist == true)
- {
- int k = row_dec->num_gates - 1;
- double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
- // TODO: this 4*cell.h number must be revisited
- double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
- drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
- double C_ld = row_dec->C_ld_dec_out;
- double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
- delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
- }
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
- double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
- double R_bl = subarray.num_rows * r_b_metal;
- double C_bl = subarray.C_bl;
-
- if (is_dram)
- {
- delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- else
- {
- delay_bl_restore = bl_precharge_eq_drv->delay +
- log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
- (R_bl_precharge * C_bl + R_bl * C_bl / 2);
- }
- }
-
-
-
- outrisetime = r_predec->compute_delays(inrisetime);
- row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
- outrisetime = b_mux_predec->compute_delays(inrisetime);
- bit_mux_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
- sa_mux_lev_1_dec->compute_delays(outrisetime);
-
- outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
- sa_mux_lev_2_dec->compute_delays(outrisetime);
-
- outrisetime = compute_bitline_delay(row_dec_outrisetime);
- outrisetime = compute_sa_delay(outrisetime);
- outrisetime = compute_subarray_out_drv(outrisetime);
- subarray_out_wire->set_in_rise_time(outrisetime);
- outrisetime = subarray_out_wire->signal_rise_time();
-
- delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
- if (dp.is_tag == true && dp.fully_assoc == false)
- {
- compute_comparator_delay(0);
- }
-
- if (row_dec->exist == false)
- {
- delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
- }
-
-// cout<<"r_predec delay=" <delay<<" row_dec delay = " <delay< 1)
- {
- height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
- // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
- }
-
- height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
-
- if (dp.Ndsam_lev_1 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
- }
-
- if (dp.Ndsam_lev_2 > 1)
- {
- height += compute_tr_width_after_folding(
- g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
- //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-
- // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
- height += 2 * compute_tr_width_after_folding(
- pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
- }
-
- // TODO: this should be uncommented...
- /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
- {
- //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
- double width_write_driver_write_mux = width_write_driver_or_write_mux();
- double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
- cell.w *
- // deg_bl_muxing *
- dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
- height += height_write_driver_write_mux;
- }*/
-
- return height;
-}
-
-
-
-double Mat::compute_cam_delay(double inrisetime)
-{
-
- double out_time_ramp, this_delay;
- double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
-
-
- double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
- Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
- Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
- Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
-
- double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
- int Htagbits;
-
- double driver_c_gate_load;
- double driver_c_wire_load;
- double driver_r_wire_load;
- //double searchline_precharge_time;
-
- double leak_power_cc_inverters_sram_cell = 0;
- double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
- double leak_power_RD_port_sram_cell = 0;
- double leak_power_SCHP_port_sram_cell = 0;
- double leak_comparator_cam_cell =0;
-
- double gate_leak_comparator_cam_cell = 0;
- double gate_leak_power_cc_inverters_sram_cell = 0;
- double gate_leak_power_RD_port_sram_cell = 0;
- double gate_leak_power_SCHP_port_sram_cell = 0;
-
- c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
- c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
- r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
- r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
-
- dynSearchEng = 0.0;
- delay_matchchline = 0.0;
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
- bool linear_scaling = false;
-
- if (linear_scaling)
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
- }
- else
- {
- Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
- Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
- Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
- Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
- Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
- Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
- Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
- Wdummyn = g_tp.cam.cell_nmos_w;
- Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
- Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
- Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
- Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- W_hit_miss_n = Wdummyn;
- W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
- }
-
- Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
-
- /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
- search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
- From the driver(am and an) to the comparators in all the rows including the dummy row,
- Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
-
- //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
- //Searchline precharge routes horizontally
- driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
- driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
- driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-
- sl_precharge_eq_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
- //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
- sl_data_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- sl_precharge_eq_drv->compute_delay(0);
- double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
- double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
- double R_bl = (subarray.num_rows + 1) * r_b_metal;
- double C_bl = subarray.C_bl_cam;
- delay_cam_sl_restore = sl_precharge_eq_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
- out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
-
- //matchline ops delay
- delay_matchchline += sl_data_drv->delay;
-
- /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
- //matchline delay, matchline power, matchline_reset for cycle time computation,
-
- ////matchline precharge circuitry routes vertically
- //There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
- driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
- driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-
- ml_precharge_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
- ml_precharge_drv->compute_delay(0);
-
-
- rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
- c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
- + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
-
- Cwire = c_matchline_metal * Htagbits;
- Rwire = r_matchline_metal * Htagbits;
- c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
-
- double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_ml = Rwire;
- double C_ml = Cwire + c_intrinsic;
- delay_cam_ml_reset = ml_precharge_drv->delay
- + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
-
- //matchline ops delay
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
- delay_matchchline += this_delay;
- out_time_ramp = this_delay / VTHFA3;
-
- dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
- * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
-
- /* third stage, from the NAND2 gates to the drivers in the dummy row */
- rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
- c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
- c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
- out_time_ramp = this_delay / (1 - VTHFA4);
- delay_matchchline += this_delay;
-
- //only the dummy row has the extra inverter between NAND and NOR gates
- dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
-
- /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
- rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
- c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
- Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
- c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
- tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
- out_time_ramp = this_delay / VTHFA5;
- delay_matchchline += this_delay;
-
- dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
- /*final statge from the NOR gate to drive the wordline of the data portion */
-
- //searchline data driver There are two matchline precharge driver chains per subarray.
- driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
- driver_c_wire_load = subarray.C_wl_ram;
- driver_r_wire_load = subarray.R_wl_ram;
-
- ml_to_ram_wl_drv = new Driver(
- driver_c_gate_load,
- driver_c_wire_load,
- driver_r_wire_load,
- is_dram);
-
-
-
- rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
- c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
- tf = rd * (c_intrinsic + c_gate_load);
- this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
- out_time_ramp = this_delay / (1-0.5);
- delay_matchchline += this_delay;
-
- out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
-
- //c_gate_load energy is computed in ml_to_ram_wl_drv
- dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-
- /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
- /*Precharge the hitting logic */
- c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
- //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
- double R_hit_miss = Rwire;
- double C_hit_miss = Cwire + c_intrinsic;
- delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /*hitting logic evaluation */
- c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
- Cwire = c_searchline_metal * subarray.num_rows;
- Rwire = r_searchline_metal * subarray.num_rows;
- c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
- rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
- tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-
- delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
-
- if (is_fa)
- delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
-
- dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
- /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
-
- power_matchline.searchOp.dynamic = dynSearchEng;
-
- //leakage in one subarray
- double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
- double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
- double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
-
- leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
- leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
- leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
- leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
- leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
-
- power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
- leak_comparator_cam_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell +
- leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
- leak_power_RD_port_sram_cell * ERP +
- leak_power_SCHP_port_sram_cell*SCHP;
-// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
- power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
- power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
- //In idle states, the hit/miss txs are closed (on) therefore no Isub
- power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
- // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
-
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
- double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
-
- gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- gate_leak_power_SCHP_port_sram_cell = 0;
-
- //cout<<"power_matchline.searchOp.leakage"<array_power_gated? g_tp.sram_cell.Vcc_min : g_tp.sram_cell.Vdd);
-// leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd);
-// leak_power_RD_port_sram_cell = Iport_erp * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd);
-
- leak_power_cc_inverters_sram_cell_gated = Icell * g_tp.sram_cell.Vcc_min;
- leak_power_acc_tr_RW_or_WR_port_sram_cell_gated = Iport * g_tp.sram.Vbitfloating;
- leak_power_RD_port_sram_cell_gated = Iport_erp * g_tp.sram.Vbitfloating;
-//
-// leak_power_cc_inverters_sram_cell_gated = leak_power_cc_inverters_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram_cell.Vcc_min;
-// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = leak_power_acc_tr_RW_or_WR_port_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating;
-// leak_power_RD_port_sram_cell_floating = leak_power_RD_port_sram_cell_floating/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating;
-//
-
-
- //in idle state, Ig_on only possibly exist in access transistors of read only ports
- double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
- double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
-
- gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
- gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
- }
-
-
- double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
- double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
- double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
- double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
- double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-
- if (is_dram)
- {
- double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
- tstep = 2.3 * fraction * r_dev *
- (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
- (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
- delay_writeback = tstep;
- dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
- dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
- per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
- }
- else
- {
- double tau;
-
- if (deg_bl_muxing > 1)
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
- subarray.num_cols * num_subarrays_per_mat*/;
- blfloating_c += (C_bl + 2 * C_drain_bit_mux) * 2;
- dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
- blfloating_c += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *2;
- dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
- //Write Ops are differential for SRAM
-
- }
- else
- {
- tau = (R_cell_pull_down + R_cell_acc) *
- (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
- R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
- dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
- 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
-
- blfloating_c += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 2;
- dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
- num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
-
- }
- tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
-// cout<<"R_cell_pull_down ="<repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
- tf = rd * C_ld;
- this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
- delay_subarray_out_drv += this_delay;
- inrisetime = this_delay/(1.0 - 0.5);
- power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
- power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
- power_subarray_out_drv.readOp.power_gated_leakage += 0;
- power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-
-
- return inrisetime;
-}
-
-
-
-double Mat::compute_comparator_delay(double inrisetime)
-{
- int A = g_ip->tag_assoc;
-
- int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
- // a multiple of 4.
-
- /* First Inverter */
- double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
- double tf = Req*Ceq;
- double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
- double nextinputtime = st1del/VTHCOMPINV;
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-
- //For each degree of associativity
- //there are 4 such quarter comparators
- double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
- /* Second Inverter */
- Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
- tf = Req*Ceq;
- double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
- nextinputtime = st2del/(1.0-VTHCOMPINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-
- /* Third Inverter */
- Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
- drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
- Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
- tf = Req*Ceq;
- double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
- nextinputtime = st3del/(VTHEVALINV);
- power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-
- /* Final Inverter (virtual ground driver) discharging compare part */
- double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
- double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
- double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
- double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
- drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
- drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
- gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
- power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
- power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
- lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
-
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
- gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
-
- /* time to go to threshold of mux driver */
- double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
- /* take into account non-zero input rise time */
- double m = g_tp.peri_global.Vdd/nextinputtime;
- double Tcomparatorni;
-
- if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
- {
- double a = m;
- double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
- Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
- }
- else
- {
- Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
- }
- delay_comparator = Tcomparatorni+st1del+st2del+st3del;
- power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
- power_comparator.readOp.power_gated_leakage = lkgCurrent * g_tp.peri_global.Vcc_min;
- power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
-
- return Tcomparatorni / (1.0 - VTHMUXNAND);;
-}
-
-
-
-void Mat::compute_power_energy()
-{
- //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
- //when search all subarrays and all mats are fully active
- //when plain read/write only one subarray in a single mat is active.
-
- // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
- power.readOp.dynamic += r_predec->power.readOp.dynamic +
- b_mux_predec->power.readOp.dynamic +
- sa_mux_lev_1_predec->power.readOp.dynamic +
- sa_mux_lev_2_predec->power.readOp.dynamic;
-
- // add energy consumed in decoders
- power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
- if (!(is_fa||pure_cam))
- power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
-
- // add energy consumed in bitline prechagers, SAs, and bitlines
- if (!(is_fa||pure_cam))
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
-
- // add energy consumed in bitlines
- //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
- }
-
- else if (is_fa)
- {
- //for plain read/write only one subarray in a mat is active
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
- + cam_bl_precharge_eq_drv->power.readOp.dynamic;
- power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-
- //Add sense amps energy
- num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
- num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
- power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
- power_sa.readOp.dynamic *= num_sa_subarray;
-
-
- // add energy consumed in bitlines
- power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
- power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
- power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
-
- //Add subarray output energy
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
- //add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
- else
- {
- // add energy consumed in bitline prechagers
- power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
- //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
-
- //Add sense amps energy
- num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
- power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
- power_sa.searchOp.dynamic = 0;
-
- power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
- power_bitline.searchOp.dynamic = 0;
- power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
-
- power_subarray_out_drv.searchOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
- power_subarray_out_drv.readOp.dynamic =
- (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
- power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
- power_sa.readOp.dynamic +
- power_bitline.readOp.dynamic +
- power_subarray_out_drv.readOp.dynamic;
-
- power.readOp.dynamic += power_row_decoders.readOp.dynamic +
- bit_mux_dec->power.readOp.dynamic +
- sa_mux_lev_1_dec->power.readOp.dynamic +
- sa_mux_lev_2_dec->power.readOp.dynamic +
- power_comparator.readOp.dynamic;
-
-
- ////add energy consumed inside cam
- power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
- power_searchline_precharge = sl_precharge_eq_drv->power;
- power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
- power_searchline = sl_data_drv->power;
- power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
- power_matchline_precharge = ml_precharge_drv->power;
- power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
- power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
- power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
- power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
- power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
- power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
- //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
- }
-
-
- int number_output_drivers_subarray;
-// // calculate leakage power
- if (!(is_fa || pure_cam))
- {
- number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bitline.readOp.power_gated_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- //bl precharge drv is not power gated to turn off the precharge and equalization circuit (PMOS, thus turn-off signal is "1") for bitline floating
- power_bl_precharge_eq_drv.readOp.power_gated_leakage = bl_precharge_eq_drv->power.readOp.power_gated_leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power_subarray_out_drv.readOp.power_gated_leakage =
- (power_subarray_out_drv.readOp.power_gated_leakage + subarray_out_wire->power.readOp.power_gated_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- power.readOp.power_gated_leakage += power_bitline.readOp.power_gated_leakage +
- power_bl_precharge_eq_drv.readOp.power_gated_leakage +
- power_sa.readOp.power_gated_leakage +
- power_subarray_out_drv.readOp.power_gated_leakage;
-
- power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.leakage += power_comparator.readOp.leakage;
-
- power_comparator.readOp.power_gated_leakage *= num_do_b_mat * (RWP + ERP);
- power.readOp.power_gated_leakage += power_comparator.readOp.power_gated_leakage;
-
- array_leakage = power_bitline.readOp.leakage;
-
- cl_leakage =
- power_bl_precharge_eq_drv.readOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage +
- power_comparator.readOp.leakage;
-
-
-
- //Decoder blocks
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
-
- power_row_decoders.readOp.power_gated_leakage = row_dec->power.readOp.power_gated_leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.power_gated_leakage = bit_mux_dec->power.readOp.power_gated_leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.power_gated_leakage = sa_mux_lev_1_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.power_gated_leakage = sa_mux_lev_2_dec->power.readOp.power_gated_leakage * dp.Ndsam_lev_2;
-
-// if (!g_ip->wl_power_gated)
-// {
- power.readOp.leakage += r_predec->power.readOp.leakage +
- b_mux_predec->power.readOp.leakage +
- sa_mux_lev_1_predec->power.readOp.leakage +
- sa_mux_lev_2_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage +
- power_bit_mux_decoders.readOp.leakage +
- power_sa_mux_lev_1_decoders.readOp.leakage +
- power_sa_mux_lev_2_decoders.readOp.leakage;
-
- power.readOp.power_gated_leakage += r_predec->power.readOp.power_gated_leakage +
- b_mux_predec->power.readOp.power_gated_leakage +
- sa_mux_lev_1_predec->power.readOp.power_gated_leakage +
- sa_mux_lev_2_predec->power.readOp.power_gated_leakage +
- power_row_decoders.readOp.power_gated_leakage +
- power_bit_mux_decoders.readOp.power_gated_leakage +
- power_sa_mux_lev_1_decoders.readOp.power_gated_leakage +
- power_sa_mux_lev_2_decoders.readOp.power_gated_leakage;
-
-// }
-// else
-// {
-// power.readOp.power_gated_leakage += (r_predec->power.readOp.leakage +
-//
-// b_mux_predec->power.readOp.leakage +
-// sa_mux_lev_1_predec->power.readOp.leakage +
-// sa_mux_lev_2_predec->power.readOp.leakage +
-// power_row_decoders.readOp.leakage +
-// power_bit_mux_decoders.readOp.leakage +
-// power_sa_mux_lev_1_decoders.readOp.leakage +
-// power_sa_mux_lev_2_decoders.readOp.leakage)/g_tp.peri_global.Vdd*g_tp.peri_global.Vcc_min;
-
-// }
-
- wl_leakage = r_predec->power.readOp.leakage +
- b_mux_predec->power.readOp.leakage +
- sa_mux_lev_1_predec->power.readOp.leakage +
- sa_mux_lev_2_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage +
- power_bit_mux_decoders.readOp.leakage +
- power_sa_mux_lev_1_decoders.readOp.leakage +
- power_sa_mux_lev_2_decoders.readOp.leakage;
-
- //++++Below is gate leakage
- power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
- //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
- //cout<<"leakage"<power_gating)
- {
-
- //cout<<"leakage1"<area.get_area()*subarray.num_cols * num_subarrays_per_mat*dp.num_mats;
- array_wakeup_e.readOp.dynamic = sram_sleep_tx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_cols*dp.num_act_mats_hor_dir;
- array_wakeup_t = sram_sleep_tx->wakeup_delay;
-
- wl_sleep_tx_area = (row_dec->exist ? row_dec->sleeptx->area.get_area() : 0)*subarray.num_rows * num_subarrays_per_mat*dp.num_mats
- + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->area.get_area() : 0)*dp.num_mats
- + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->area.get_area() : 0)*dp.num_mats
- + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->area.get_area() : 0)*dp.num_mats;
- wl_wakeup_e.readOp.dynamic = (row_dec->exist ? row_dec->sleeptx->wakeup_power.readOp.dynamic :0) * num_subarrays_per_mat*subarray.num_rows*dp.num_act_mats_hor_dir
- + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats
- + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats
- + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->wakeup_power.readOp.dynamic : 0)*dp.num_mats;
- wl_wakeup_t = (row_dec->exist ? row_dec->sleeptx->wakeup_delay : 0)
- + (bit_mux_dec->exist ? bit_mux_dec->sleeptx->wakeup_delay : 0)*dp.num_mats
- + (sa_mux_lev_1_dec->exist ? sa_mux_lev_1_dec->sleeptx->wakeup_delay : 0)*dp.num_mats
- + (sa_mux_lev_2_dec->exist ? sa_mux_lev_2_dec->sleeptx->wakeup_delay : 0)*dp.num_mats;;
- }
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
- power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
- power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
-
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- b_mux_predec->power.readOp.gate_leakage +
- sa_mux_lev_1_predec->power.readOp.gate_leakage +
- sa_mux_lev_2_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage +
- power_bit_mux_decoders.readOp.gate_leakage +
- power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- power_sa_mux_lev_2_decoders.readOp.gate_leakage;
- }
- else if (is_fa) //fully assoc
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.leakage += power_bitline.readOp.leakage +
- power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
-
- //cout<<"leakage5"<power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
-// cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
- //cout<<"leakage3"<power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
- power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
-
- //cout<<"leakage4"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
-
- //cout<<"leakage5"<power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
-
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
-
- }
- else //pure CAM
- {
- int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
- //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
- //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
- power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
-
- power_subarray_out_drv.readOp.leakage =
- (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.leakage += //power_bitline.readOp.leakage +
- //power_bl_precharge_eq_drv.readOp.leakage +
- power_bl_precharge_eq_drv.searchOp.leakage +
- power_sa.readOp.leakage +
- power_subarray_out_drv.readOp.leakage;
-
- // leakage power
- power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.leakage += r_predec->power.readOp.leakage +
- power_row_decoders.readOp.leakage;
-
- //inside cam
- power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
- power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
- power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
- //+++Below is gate leakage
- power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
- power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
-
- power_subarray_out_drv.readOp.gate_leakage =
- (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
- number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
- power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
- //power_bl_precharge_eq_drv.readOp.gate_leakage +
- power_bl_precharge_eq_drv.searchOp.gate_leakage +
- power_sa.readOp.gate_leakage +
- power_subarray_out_drv.readOp.gate_leakage;
-
- // gate_leakage power
- power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
- power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
- power_row_decoders.readOp.gate_leakage;
-
- //inside cam
- power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
- power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
- power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
- power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
-
- power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
- }
-}
-
diff --git a/cacti/mat.h b/cacti/mat.h
deleted file mode 100755
index b8465e4..0000000
--- a/cacti/mat.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#ifndef __MAT_H__
-#define __MAT_H__
-
-#include "component.h"
-#include "decoder.h"
-#include "wire.h"
-#include "subarray.h"
-#include "powergating.h"
-
-class Mat : public Component
-{
- public:
- Mat(const DynamicParameter & dyn_p);
- ~Mat();
- double compute_delays(double inrisetime); // return outrisetime
- void compute_power_energy();
-
- const DynamicParameter & dp;
-
- // TODO: clean up pointers and powerDefs below
- Decoder * row_dec;
- Decoder * bit_mux_dec;
- Decoder * sa_mux_lev_1_dec;
- Decoder * sa_mux_lev_2_dec;
- PredecBlk * dummy_way_sel_predec_blk1;
- PredecBlk * dummy_way_sel_predec_blk2;
- PredecBlkDrv * way_sel_drv1;
- PredecBlkDrv * dummy_way_sel_predec_blk_drv2;
-
- Predec * r_predec;
- Predec * b_mux_predec;
- Predec * sa_mux_lev_1_predec;
- Predec * sa_mux_lev_2_predec;
-
- Wire * subarray_out_wire;
- Driver * bl_precharge_eq_drv;
- Driver * cam_bl_precharge_eq_drv;//bitline pre-charge circuit is separated for CAM and RAM arrays.
- Driver * ml_precharge_drv;//matchline prechange driver
- Driver * sl_precharge_eq_drv;//searchline prechage driver
- Driver * sl_data_drv;//search line data driver
- Driver * ml_to_ram_wl_drv;//search line data driver
-
-
- powerDef power_row_decoders;
- powerDef power_bit_mux_decoders;
- powerDef power_sa_mux_lev_1_decoders;
- powerDef power_sa_mux_lev_2_decoders;
- powerDef power_fa_cam; // TODO: leakage power is not computed yet
- powerDef power_bl_precharge_eq_drv;
- powerDef power_subarray_out_drv;
- powerDef power_cam_all_active;
- powerDef power_searchline_precharge;
- powerDef power_matchline_precharge;
- powerDef power_ml_to_ram_wl_drv;
-
- double delay_fa_tag, delay_cam;
- double delay_before_decoder;
- double delay_bitline;
- double delay_wl_reset;
- double delay_bl_restore;
-
- double delay_searchline;
- double delay_matchchline;
- double delay_cam_sl_restore;
- double delay_cam_ml_reset;
- double delay_fa_ram_wl;
-
- double delay_hit_miss_reset;
- double delay_hit_miss;
-
- Subarray subarray;
- powerDef power_bitline, power_searchline, power_matchline, power_bitline_gated;
- double per_bitline_read_energy;
- int deg_bl_muxing;
- int num_act_mats_hor_dir;
- double delay_writeback;
- Area cell,cam_cell;
- bool is_dram,is_fa, pure_cam, camFlag;
- int num_mats;
- powerDef power_sa;
- double delay_sa;
- double leak_power_sense_amps_closed_page_state;
- double leak_power_sense_amps_open_page_state;
- double delay_subarray_out_drv;
- double delay_subarray_out_drv_htree;
- double delay_comparator;
- powerDef power_comparator;
- int num_do_b_mat;
- int num_so_b_mat;
- int num_sa_subarray;
- int num_sa_subarray_search;
- double C_bl;
-
- uint32_t num_subarrays_per_mat; // the number of subarrays in a mat
- uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
-
- double array_leakage;
- double wl_leakage;
- double cl_leakage;
-
- Sleep_tx * sram_sleep_tx;
- Sleep_tx * wl_sleep_tx;
- Sleep_tx * cl_sleep_tx;
-
- powerDef array_wakeup_e;
- double array_wakeup_t;
- double array_sleep_tx_area;
-
- powerDef blfloating_wakeup_e;
- double blfloating_wakeup_t;
- double blfloating_sleep_tx_area;
-
- powerDef wl_wakeup_e;
- double wl_wakeup_t;
- double wl_sleep_tx_area;
-
- powerDef cl_wakeup_e;
- double cl_wakeup_t;
- double cl_sleep_tx_area;
-
- private:
- double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
- double width_write_driver_or_write_mux();
- double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
- double compute_cam_delay(double inrisetime);
- double compute_bitline_delay(double inrisetime);
- double compute_sa_delay(double inrisetime);
- double compute_subarray_out_drv(double inrisetime);
- double compute_comparator_delay(double inrisetime);
-
- int RWP;
- int ERP;
- int EWP;
- int SCHP;
-};
-
-
-
-#endif
diff --git a/cacti/parameter.cc b/cacti/parameter.cc
deleted file mode 100644
index c4c4a92..0000000
--- a/cacti/parameter.cc
+++ /dev/null
@@ -1,714 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include
-#include
-#include
-
-#include "parameter.h"
-#include "area.h"
-
-using namespace std;
-
-
-InputParameter * g_ip;
-TechnologyParameter g_tp;
-
-
-
-void TechnologyParameter::DeviceType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
- cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl;
- cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl;
- cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl;
- cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl;
- cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl;
- cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl;
- cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl;
- cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl;
- cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl;
- cout << indent_str << "Vdd_default = " << setw(12) << Vdd_default << " V" << endl;
- cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl;
- cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl;
- cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl;
- cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl;
- cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl;
- cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl;
- cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl;
-}
-
-
-
-void TechnologyParameter::InterconnectType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
- cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl;
- cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
-}
-
-void TechnologyParameter::ScalingFactor::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
- cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
-}
-
-void TechnologyParameter::MemoryType::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
- cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl;
- cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl;
- cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl;
- cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl;
- cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl;
-}
-
-
-
-void TechnologyParameter::display(uint32_t indent)
-{
- string indent_str(indent, ' ');
-
- cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
- cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl;
- cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl;
- cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl;
- cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl;
- cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl;
- cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl;
- cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl;
- cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl;
- cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl;
- cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl;
- cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl;
- cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl;
- cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl;
- cout << endl;
- cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl;
- cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl;
- cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl;
- cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl;
- cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl;
- cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl;
- cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl;
- cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl;
- cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl;
- cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl;
- cout << endl;
- cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl;
- cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl;
- cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl;
- cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl;
- cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl;
- cout << endl;
- cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl;
- cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl;
- cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl;
- cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl;
- cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl;
- cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl;
- cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl;
-
- cout << endl;
- cout << indent_str << "SRAM cell transistor: " << endl;
- sram_cell.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM access transistor: " << endl;
- dram_acc.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM wordline transistor: " << endl;
- dram_wl.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "peripheral global transistor: " << endl;
- peri_global.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire local" << endl;
- wire_local.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire inside mat" << endl;
- wire_inside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "wire outside mat" << endl;
- wire_outside_mat.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "SRAM" << endl;
- sram.display(indent + 2);
-
- cout << endl;
- cout << indent_str << "DRAM" << endl;
- dram.display(indent + 2);
-}
-
-
-DynamicParameter::DynamicParameter():
- use_inp_params(0), cell(), is_valid(true)
-{
-}
-
-
-
-DynamicParameter::DynamicParameter(
- bool is_tag_,
- int pure_ram_,
- int pure_cam_,
- double Nspd_,
- unsigned int Ndwl_,
- unsigned int Ndbl_,
- unsigned int Ndcm_,
- unsigned int Ndsam_lev_1_,
- unsigned int Ndsam_lev_2_,
- bool is_main_mem_):
- is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
- Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
- number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
- is_main_mem(is_main_mem_), cell(), is_valid(false)
-{
- ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
-
- unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer
- const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
- fully_assoc = (g_ip->fully_assoc) ? true : false;
-
- if (fully_assoc || pure_cam)
- { // fully-assocative cache -- ref: CACTi 2.0 report
- if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
- Ndcm != 1 || //Ndcm is fixed to 1 for FA
- Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
- Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
- Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
- Ndbl < 2)
- {
- return;
- }
- }
-
- if ((is_dram) && (!is_tag) && (Ndcm > 1))
- {
- return; // For a DRAM array, each bitline has its own sense-amp
- }
-
- // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
- // at least two because an array is assumed to have at least one mat. And a mat
- // is formed out of two horizontal subarrays and two vertical subarrays
- if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
- {
- return;
- }
-
- //***********compute row, col of an subarray
- if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
- {
- // if data array, let tagbits = 0
- if (is_tag)
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
- _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks);
-
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
- //burst_length = 1;
- }
- else
- {
- num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
- g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
- num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
- // burst_length = g_ip->block_sz * 8 / g_ip->out_w;
- }
-
- if (num_r_subarray < MINSUBARRAYROWS) return;
- if (num_r_subarray == 0) return;
- if (num_r_subarray > MAXSUBARRAYROWS) return;
- if (num_c_subarray < MINSUBARRAYCOLS) return;
- if (num_c_subarray > MAXSUBARRAYCOLS) return;
-
- }
-
- else
- {//either fully-asso or cam
- if (pure_cam)
- {
- if (g_ip->specific_tag)
- {
- tagbits = int(ceil(g_ip->tag_w/8.0)*8);
- }
- else
- {
- tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8);
-// cout<<"Pure CAM needs tag width to be specified"<> 2) << 2);
-
- tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
- //tag_num_c_subarray = (int)(tagbits + EPSILON);
- tag_num_c_subarray = tagbits;
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- else //fully associative
- {
- if (g_ip->specific_tag)
- {
- tagbits = g_ip->tag_w;
- }
- else
- {
- tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
- }
- tagbits = (((tagbits + 3) >> 2) << 2);
-
- tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
- tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
- if (tag_num_r_subarray == 0) return;
- if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
- if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
- if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
-
- data_num_r_subarray = tag_num_r_subarray;
- data_num_c_subarray = 8 * g_ip->block_sz;
- if (data_num_r_subarray == 0) return;
- if (data_num_r_subarray > MAXSUBARRAYROWS) return;
- if (data_num_c_subarray < MINSUBARRAYCOLS) return;
- if (data_num_c_subarray > MAXSUBARRAYCOLS) return;
- num_r_subarray = tag_num_r_subarray;
- }
- }
-
- num_subarrays = Ndwl * Ndbl;
- //****************end of computation of row, col of an subarray
-
- // calculate wire parameters
- if (fully_assoc || pure_cam)
- {
- cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
- cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
-
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
- + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
- + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
- }
- else
- {
- if(is_tag)
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
- g_ip->num_wr_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
- wire_local.pitch * g_ip->num_se_rd_ports;
- }
- else
- {
- if (is_dram)
- {
- cell.h = g_tp.dram.b_h;
- cell.w = g_tp.dram.b_w;
- }
- else
- {
- cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
- g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
- cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
- (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +
- g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports;
- }
- }
- }
-
- double c_b_metal = cell.h * wire_local.C_per_um;
- double C_bl;
-
- if (!(fully_assoc || pure_cam))
- {
- if (is_dram)
- {
- deg_bl_muxing = 1;
- if (ram_cell_tech_type == comm_dram)
- {
- C_bl = num_r_subarray * c_b_metal;
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
- if (V_b_sense < VBITSENSEMIN)
- {
- return;
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- dram_refresh_period = 64e-3;
- }
- else
- {
- double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
-
- if (V_b_sense < VBITSENSEMIN)
- {
- return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
- }
- V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
- //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C;
- //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
- dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
- }
- }
- else
- { //SRAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = Ndcm;
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0;
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
- }
- else
- {
- c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
- V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
- deg_bl_muxing = 1;//FA fix as 1
- // "/ 2.0" below is due to the fact that two adjacent access transistors share drain
- // contacts in a physical layout
- double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines
- C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
- dram_refresh_period = 0;
- }
-
-
- // do/di: data in/out, for fully associative they are the data width for normal read and write
- // so/si: search data in/out, for fully associative they are the data width for the search ops
- // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
- // so/si needs broadcase while do/di do not
-
- if (fully_assoc || pure_cam)
- {
- switch (Ndbl) {
- case (0):
- cout << " Invalid Ndbl \n"<int_prefetch_w * g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
- if (g_ip->fast_access == true)
- {
- num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- }
- else
- {
-
- num_do_b_subbank = g_ip->out_w;
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
- if (deg_sa_mux_l1_non_assoc < 1)
- {
- return;
- }
-
- }
- }
- }
- else
- {
- num_do_b_subbank = tagbits * g_ip->tag_assoc;
- if (num_do_b_mat < tagbits)
- {
- return;
- }
- deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
- //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
- num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
- }
- else
- {
- num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
- num_do_b_subbank = tag_num_c_subarray;
- }
-
- deg_sa_mux_l1_non_assoc = 1;
- }
-
- deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
-
- if (fully_assoc || pure_cam)
- {
- num_act_mats_hor_dir = 1;
- num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
- }
- else
- {
- num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
- if (num_act_mats_hor_dir == 0)
- {
- return;
- }
- }
-
- //compute num_do_mat for tag
- if (is_tag)
- {
- if (!(fully_assoc || pure_cam))
- {
- num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
- num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
- }
- }
-
- if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
- {
- if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
- {
- return;
- }
- }
-
-// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
- if (is_tag == false && g_ip->is_main_mem == true &&
- num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
- {
- return;
- }
-
- if (num_act_mats_hor_dir > num_mats_h_dir)
- {
- return;
- }
-
-
- //compute di for mat subbank and bank
- if (!(fully_assoc ||pure_cam))
- {
- if(!is_tag)
- {
- if(g_ip->fast_access == true)
- {
- num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
- }
- else
- {
- num_di_b_mat = num_do_b_mat;
- }
- }
- else
- {
- num_di_b_mat = tagbits;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_mat = num_do_b_mat;
- //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
- //but inside the mat wire tracks need to be reserved for search data bus
- num_si_b_mat = tagbits;
- }
- else
- {
- num_di_b_mat = tagbits;
- num_si_b_mat = tagbits;//*num_subarrays/num_mats;
- }
-
- }
-
- num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA
- num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast
-
- int num_addr_b_row_dec = _log2(num_r_subarray);
- if ((fully_assoc ||pure_cam))
- num_addr_b_row_dec +=_log2(num_subarrays/num_mats);
- int number_subbanks = num_mats / num_act_mats_hor_dir;
- number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM
-
- num_rw_ports = g_ip->num_rw_ports;
- num_rd_ports = g_ip->num_rd_ports;
- num_wr_ports = g_ip->num_wr_ports;
- num_se_rd_ports = g_ip->num_se_rd_ports;
- num_search_ports = g_ip->num_search_ports;
-
- if (is_dram && is_main_mem)
- {
- number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
- _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
- }
- else
- {
- number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
- _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
- }
-
- if (!(fully_assoc ||pure_cam))
- {
- if (is_tag)
- {
- num_di_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->data_assoc;
- }
- else
- {
- num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
- num_do_b_bank_per_port = g_ip->out_w;
- }
- }
- else
- {
- if (fully_assoc)
- {
- num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = g_ip->out_w + tagbits;
- num_so_b_bank_per_port = g_ip->out_w;
- }
- else
- {
- num_di_b_bank_per_port = tagbits;
- num_si_b_bank_per_port = tagbits;
- num_do_b_bank_per_port = tagbits;
- num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));
- }
- }
-
- if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
- {
- number_way_select_signals_mat = g_ip->data_assoc;
- }
-
- // add ECC adjustment to all data signals that traverse on H-trees.
- if (g_ip->add_ecc_b_ == true)
- {
- num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
- num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
- num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
- num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_));
- num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_));
- num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_));
-
- num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_));
- num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_));
- num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_));
- num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_));
- num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_));
- num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_));
- }
-
- is_valid = true;
-}
-
diff --git a/cacti/powergating.cc b/cacti/powergating.cc
deleted file mode 100644
index 8141927..0000000
--- a/cacti/powergating.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "area.h"
-#include "powergating.h"
-#include "parameter.h"
-#include
-#include
-#include
-
-using namespace std;
-
-/*
- * Sizing of sleep tx is independent of sleep/power-saving supply voltage, sleep/power-saving supply voltage only affects wake-up energy and time
- *
- * While using DSTN (Distributed sleep tx network), worst case sizing is used.
- * For DSTN, the network can help to reduce the runtime latency (or achieve the same latency with smaller transistor size)
- * For example, during write access, if not all bits are toggled, the sleep tx in the non-toggled path can work as the extra
- * discharge paths of all the toggled bits, in addition to the sleep tx in the bitlines with the toggled bits. Since CACTI itself
- * assumes worst case with all bits toggled, sleep txs are assumed to work all the time with all bits toggled,
- * Therefore, although DTSN is used, for memory array, the number of sleep txs is related to the number of rows and cols.,
- * and all calculations are still base on single sleep tx for each discharge case. Of couse in each discharge path, the sleep
- * tx is the charge path of all the devices in the same path (row or col).
- *
- * Even in the worse case sizing, the wakeup time will not change
- * since all paths need to charge/discharge---each sleep tx is just do its own portion of the work during wakeup or entering sleep state.
- *
- * Power-gating and DVS cannot happen at the same time! Because power-gating happens when circuit is idle,
- * while DVS happens when circuit is active.
- * When waking up from power-gating status, it is assumed that the system will first wakeup to DVS0 (full speed) state, if DVS is enabled in
- * the system.
- *
- *
- *
-*/
-Sleep_tx::Sleep_tx(
- double _perf_with_sleep_tx,
- double _active_Isat,//of circuit block, not sleep tx
- bool _is_footer,
- double _c_circuit_wakeup,
- double _V_delta,
- int _num_sleep_tx,
-// double _vt_circuit,
-// double _vt_sleep_tx,
-// double _mobility,//of sleep tx
-// double _c_ox,//of sleep tx
- const Area & cell_)
-:perf_with_sleep_tx(_perf_with_sleep_tx),
- active_Isat(_active_Isat),
- is_footer(_is_footer),
- c_circuit_wakeup(_c_circuit_wakeup),
- V_delta(_V_delta),
- num_sleep_tx(_num_sleep_tx),
-// vt_circuit(_vt_circuit),
-// vt_sleep_tx(_vt_sleep_tx),
-// mobility(_mobility),
-// c_ox(_c_ox)
- cell(cell_),
- is_sleep_tx(true)
-{
-
- //a single sleep tx in a network
- double raw_area, raw_width, raw_hight;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
- vdd = g_tp.peri_global.Vdd;
- vt_circuit = g_tp.peri_global.Vth;
- vt_sleep_tx = g_tp.sleep_tx.Vth;
- mobility = g_tp.sleep_tx.Mobility_n;
- c_ox = g_tp.sleep_tx.C_ox;
-
- width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers
- width /= num_sleep_tx;
-
-// double cell_hight = MAX(cell.w*2, g_tp.cell_h_def);
- raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.h)/2; //Only single device, assuming device is laid on the side of the circuit block without changing the height of the standard library cells (using the standard cell approach).
- raw_width = cell.w;
- raw_hight = raw_area/cell.w;
- area.set_h(raw_hight);
- area.set_w(raw_width);
-
- compute_penalty();
-
-}
-
-double Sleep_tx::compute_penalty()
-{
- //V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta
- double c_load;
- double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
-
- if (is_footer)
- {
- c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx);
-// V_delta = _V_delta;
- wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
- wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
- //no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs
- }
- else
- {
- c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx);
-// V_delta = _V_delta;
- wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
- wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
- }
-
-/*
- The number of cycles in the wake-up latency set the constraint on the
- minimum number of idle clock cycles needed before a processor
- can enter in the corresponding sleep mode without any wakeup
- overhead.
-
- If the circuit is half way to sleep then waken up, it is still OK
- just the wakeup latency will be shorter than the wakeup time from full asleep.
- So, the sleep time and energy does not matter
-*/
-
-}
-
diff --git a/cacti/router.cc b/cacti/router.cc
deleted file mode 100644
index b8c22d3..0000000
--- a/cacti/router.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include "router.h"
-
-Router::Router(
- double flit_size_,
- double vc_buf, /* vc size = vc_buffer_size * flit_size */
- double vc_c,
- TechnologyParameter::DeviceType *dt,
- double I_,
- double O_,
- double M_
- ):flit_size(flit_size_),
- deviceType(dt),
- I(I_),
- O(O_),
- M(M_)
-{
- vc_buffer_size = vc_buf;
- vc_count = vc_c;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- double technology = g_ip->F_sz_um;
-
- Vdd = dt->Vdd;
-
- /*Crossbar parameters. Transmisson gate is employed for connector*/
- NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
- PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
- wt = 15*technology*1e-6/2; /*track width*/
- ht = 15*technology*1e-6/2; /*track height*/
-// I = 5; /*Number of crossbar input ports*/
-// O = 5; /*Number of crossbar output ports*/
- NTi = 12.5*technology*1e-6/2;
- PTi = 25*technology*1e-6/2;
-
- NTid = 60*technology*1e-6/2; //m
- PTid = 120*technology*1e-6/2; // m
- NTod = 60*technology*1e-6/2; // m
- PTod = 120*technology*1e-6/2; // m
-
- calc_router_parameters();
-}
-
-Router::~Router(){}
-
-
-double //wire cap with triple spacing
-Router::Cw3(double length) {
- Wire wc(g_ip->wt, length, 1, 3, 3);
- return (wc.wire_cap(length));
-}
-
-/*Function to calculate the gate capacitance*/
-double
-Router::gate_cap(double w) {
- return (double) gate_C (w*1e6 /*u*/, 0);
-}
-
-/*Function to calculate the diffusion capacitance*/
-double
-Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
- double s /*number of stacking transistors*/) {
- return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
-}
-
-
-/*crossbar related functions */
-
-// Model for simple transmission gate
-double
-Router::transmission_buf_inpcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
-}
-
-double
-Router::transmission_buf_outcap() {
- return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
-}
-
-double
-Router::transmission_buf_ctrcap() {
- return gate_cap(NTtr)+gate_cap(PTtr);
-}
-
-double
-Router::crossbar_inpline() {
- return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
- gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
-}
-
-double
-Router::crossbar_outline() {
- return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
- gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
-}
-
-double
-Router::crossbar_ctrline() {
- return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
- diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
- gate_cap(NTi) + gate_cap(PTi));
-}
-
-double
-Router::tr_crossbar_power() {
- return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
- crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
-}
-
-void Router::buffer_stats()
-{
- DynamicParameter dyn_p;
- dyn_p.is_tag = false;
- dyn_p.pure_cam = false;
- dyn_p.fully_assoc = false;
- dyn_p.pure_ram = true;
- dyn_p.is_dram = false;
- dyn_p.is_main_mem = false;
- dyn_p.num_subarrays = 1;
- dyn_p.num_mats = 1;
- dyn_p.Ndbl = 1;
- dyn_p.Ndwl = 1;
- dyn_p.Nspd = 1;
- dyn_p.deg_bl_muxing = 1;
- dyn_p.deg_senseamp_muxing_non_associativity = 1;
- dyn_p.Ndsam_lev_1 = 1;
- dyn_p.Ndsam_lev_2 = 1;
- dyn_p.Ndcm = 1;
- dyn_p.number_addr_bits_mat = 8;
- dyn_p.number_way_select_signals_mat = 1;
- dyn_p.number_subbanks_decode = 0;
- dyn_p.num_act_mats_hor_dir = 1;
- dyn_p.V_b_sense = Vdd; // FIXME check power calc.
- dyn_p.ram_cell_tech_type = 0;
- dyn_p.num_r_subarray = (int) vc_buffer_size;
- dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
- dyn_p.num_mats_h_dir = 1;
- dyn_p.num_mats_v_dir = 1;
- dyn_p.num_do_b_subbank = (int)flit_size;
- dyn_p.num_di_b_subbank = (int)flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_mat = (int) flit_size;
- dyn_p.num_di_b_mat = (int) flit_size;
- dyn_p.num_do_b_bank_per_port = (int) flit_size;
- dyn_p.num_di_b_bank_per_port = (int) flit_size;
- dyn_p.out_w = (int) flit_size;
-
- dyn_p.use_inp_params = 1;
- dyn_p.num_wr_ports = (unsigned int) vc_count;
- dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
- dyn_p.num_rw_ports = 0;
- dyn_p.num_se_rd_ports =0;
- dyn_p.num_search_ports =0;
-
-
-
- dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
- dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
- dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
- (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
- dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
-
- Mat buff(dyn_p);
- buff.compute_delays(0);
- buff.compute_power_energy();
- buffer.power.readOp = buff.power.readOp;
- buffer.power.writeOp = buffer.power.readOp; //FIXME
- buffer.area = buff.area;
-}
-
-
-
- void
-Router::cb_stats ()
-{
- if (1) {
- Crossbar c_b(I, O, flit_size);
- c_b.compute_power();
- crossbar.delay = c_b.delay;
- crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
- crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
- crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
- crossbar.area = c_b.area;
-// c_b.print_crossbar();
- }
- else {
- crossbar.power.readOp.dynamic = tr_crossbar_power();
- crossbar.power.readOp.leakage = flit_size * I * O *
- cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- crossbar.power.readOp.gate_leakage = flit_size * I * O *
- cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
- }
-}
-
-void
-Router::get_router_power()
-{
- /* calculate buffer stats */
- buffer_stats();
-
- /* calculate cross-bar stats */
- cb_stats();
-
- /* calculate arbiter stats */
- Arbiter vcarb(vc_count, flit_size, buffer.area.w);
- Arbiter cbarb(I, flit_size, crossbar.area.w);
- vcarb.compute_power();
- cbarb.compute_power();
- arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
- cbarb.power.readOp.dynamic * O;
- arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
- cbarb.power.readOp.leakage * O;
- arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
- cbarb.power.readOp.gate_leakage * O;
-
-// arb_stats();
- power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
- crossbar.power.readOp.dynamic +
- arbiter.power.readOp.dynamic)*MIN(I, O)*M;
- double pppm_t[4] = {1,I,I,1};
- power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
-
-}
-
- void
-Router::get_router_delay ()
-{
- FREQUENCY=5; // move this to config file --TODO
- cycle_time = (1/(double)FREQUENCY)*1e3; //ps
- delay = 4;
- max_cyc = 17 * g_tp.FO4; //s
- max_cyc *= 1e12; //ps
- if (cycle_time < max_cyc) {
- FREQUENCY = (1/max_cyc)*1e3; //GHz
- }
-}
-
- void
-Router::get_router_area()
-{
- area.h = I*buffer.area.h;
- area.w = buffer.area.w+crossbar.area.w;
-}
-
- void
-Router::calc_router_parameters()
-{
- /* calculate router frequency and pipeline cycles */
- get_router_delay();
-
- /* router power stats */
- get_router_power();
-
- /* area stats */
- get_router_area();
-}
-
- void
-Router::print_router()
-{
- cout << "\n\nRouter stats:\n";
- cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
- cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
- cout << "\tNo. of Virtual channels - " << vc_count << "\n";
- cout << "\tNo. of pipeline stages - " << delay << endl;
- cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
- cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
- cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
- cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
- cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
- cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
- cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
- cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<
-#include
-#include "basic_circuit.h"
-#include "cacti_interface.h"
-#include "component.h"
-#include "mat.h"
-#include "parameter.h"
-#include "wire.h"
-#include "crossbar.h"
-#include "arbiter.h"
-
-
-
-class Router : public Component
-{
- public:
- Router(
- double flit_size_,
- double vc_buf, /* vc size = vc_buffer_size * flit_size */
- double vc_count,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global),
- double I_ = 5,
- double O_ = 5,
- double M_ = 0.6);
- ~Router();
-
-
- void print_router();
-
- Component arbiter, crossbar, buffer;
-
- double cycle_time, max_cyc;
- double flit_size;
- double vc_count;
- double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
-
- private:
- TechnologyParameter::DeviceType *deviceType;
- double FREQUENCY; // move this to config file --TODO
- double Cw3(double len);
- double gate_cap(double w);
- double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
- enum Wire_type wtype;
- enum Wire_placement wire_placement;
- //corssbar
- double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2;
- double M; //network load
- double transmission_buf_inpcap();
- double transmission_buf_outcap();
- double transmission_buf_ctrcap();
- double crossbar_inpline();
- double crossbar_outline();
- double crossbar_ctrline();
- double tr_crossbar_power();
- void cb_stats ();
- double arb_power();
- void arb_stats ();
- double buffer_params();
- void buffer_stats();
-
-
- //arbiter
-
- //buffer
-
- //router params
- double Vdd;
-
- void calc_router_parameters();
- void get_router_area();
- void get_router_power();
- void get_router_delay();
-
- double min_w_pmos;
-
-
-};
-
-#endif
diff --git a/cacti/subarray.cc b/cacti/subarray.cc
deleted file mode 100755
index ef5737d..0000000
--- a/cacti/subarray.cc
+++ /dev/null
@@ -1,197 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-
-#include
-#include
-#include
-
-#include "subarray.h"
-
-
-Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
- dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
- num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
- cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
-{
- //num_cols=7;
- //cout<<"num_cols ="<< num_cols <add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
- uint32_t ram_num_cells_wl_stitching =
- (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
- (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
-
- area.h = cell.h * num_rows;
-
- area.w = cell.w * num_cols +
- ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
- }
- else //cam fa
- {
-
- //should not add dummy row here since the dummy row do not need decoder
- if (is_fa)// fully associative cache
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
- num_cols = num_cols_fa_cam + num_cols_fa_ram;
- }
- else
- {
- num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
- num_cols_fa_ram = 0;
- num_cols = num_cols_fa_cam;
- }
-
- area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
- area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
- + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
- + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
- + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
- }
-
- assert(area.h>0);
- assert(area.w>0);
- compute_C();
-}
-
-
-
-Subarray::~Subarray()
-{
-}
-
-
-
-double Subarray::get_total_cell_area()
-{
-// return (is_fa==false? cell.get_area() * num_rows * num_cols
-// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
-// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
-// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
-
- if (!(is_fa || dp.pure_cam))
- return (cell.get_area() * num_rows * num_cols);
- else if (is_fa)
- { //for FA, this area includes the dummy cells in SRAM arrays.
- //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
- //cout<<"diff" <
-#include "basic_circuit.h"
-
-#include "parameter.h"
-
-double wire_resistance(double resistivity, double wire_width, double wire_thickness,
- double barrier_thickness, double dishing_thickness, double alpha_scatter)
-{
- double resistance;
- resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
- return(resistance);
-}
-
-double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
- double ild_thickness, double miller_value, double horiz_dielectric_constant,
- double vert_dielectric_constant, double fringe_cap)
-{
- double vertical_cap, sidewall_cap, total_cap;
- vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
- sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
- total_cap = vertical_cap + sidewall_cap + fringe_cap;
- return(total_cap);
-}
-
-
-void init_tech_params(double technology, bool is_tag)
-{
- int iter, tech, tech_lo, tech_hi;
- double curr_alpha, curr_vpp;
- double wire_width, wire_thickness, wire_spacing,
- fringe_cap, pmos_to_nmos_sizing_r;
-// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant;
- double barrier_thickness, dishing_thickness, alpha_scatter;
- double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell;
-
- uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
- uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type;
-
- technology = technology * 1000.0; // in the unit of nm
-
- // initialize parameters
- g_tp.reset();
- double gmp_to_gmn_multiplier_periph_global = 0;
-
- double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram,
- curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram,
- curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram,
- curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp;
- double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data
- curr_asp_ratio_cell_cam;
- double SENSE_AMP_D, SENSE_AMP_P; // J
- double area_cell_dram = 0;
- double asp_ratio_cell_dram = 0;
- double area_cell_sram = 0;
- double asp_ratio_cell_sram = 0;
- double area_cell_cam = 0;
- double asp_ratio_cell_cam = 0;
- double mobility_eff_periph_global = 0;
- double Vdsat_periph_global = 0;
- double nmos_effective_resistance_multiplier;
- double width_dram_access_transistor;
-
- double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date
- double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn
- double curr_chip_layout_overhead = 0;
- double curr_macro_layout_overhead = 0;
- double curr_sckt_co_eff = 0;
-
- if (technology < 181 && technology > 179)
- {
- tech_lo = 180;
- tech_hi = 180;
- }
- else if (technology < 91 && technology > 89)
- {
- tech_lo = 90;
- tech_hi = 90;
- }
- else if (technology < 66 && technology > 64)
- {
- tech_lo = 65;
- tech_hi = 65;
- }
- else if (technology < 46 && technology > 44)
- {
- tech_lo = 45;
- tech_hi = 45;
- }
- else if (technology < 33 && technology > 31)
- {
- tech_lo = 32;
- tech_hi = 32;
- }
- else if (technology < 23 && technology > 21)
- {
- tech_lo = 22;
- tech_hi = 22;
- if (ram_cell_tech_type == 3 )
- {
- cout<<"current version does not support eDRAM technologies at 22nm"< 15)
-// {
-// tech_lo = 16;
-// tech_hi = 16;
-// }
- else if (technology < 180 && technology > 90)
- {
- tech_lo = 180;
- tech_hi = 90;
- }
- else if (technology < 90 && technology > 65)
- {
- tech_lo = 90;
- tech_hi = 65;
- }
- else if (technology < 65 && technology > 45)
- {
- tech_lo = 65;
- tech_hi = 45;
- }
- else if (technology < 45 && technology > 32)
- {
- tech_lo = 45;
- tech_hi = 32;
- }
- else if (technology < 32 && technology > 22)
- {
- tech_lo = 32;
- tech_hi = 22;
- }
-// else if (technology < 22 && technology > 16)
-// {
-// tech_lo = 22;
-// tech_hi = 16;
-// }
- else
- {
- cout<<"Invalid technology nodes"<specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];
- alpha_power_law[0]=1.4;
- Lphy[0] = 0.12;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3*(Aggre_proj? 1.9/1.2:2);//micron
- v_th[0] = Aggre_proj? 0.36 : 0.4407;//V
- c_ox[0] = 1.79e-14*(Aggre_proj? 1.9/1.2:2);//F/micron2
- mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128*2; //V
- c_g_ideal[0] = (Aggre_proj? 1.9/1.2:2)*6.64e-16;//F/micron
- c_fringe[0] = (Aggre_proj? 1.9/1.2:2)*0.08e-15;//F/micron
- c_junc[0] = (Aggre_proj? 1.9/1.2:2)*1e-15;//F/micron2
- I_on_n[0] = 750e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron
- I_on_p[0] = 350e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 7e-10;//A/micron
- I_off_n[0][10] = 8.26e-10;
- I_off_n[0][20] = 9.74e-10;
- I_off_n[0][30] = 1.15e-9;
- I_off_n[0][40] = 1.35e-9;
- I_off_n[0][50] = 1.60e-9;
- I_off_n[0][60] = 1.88e-9;
- I_off_n[0][70] = 2.29e-9;
- I_off_n[0][80] = 2.70e-9;
- I_off_n[0][90] = 3.19e-9;
- I_off_n[0][100] = 3.76e-9;
-
- I_g_on_n[0][0] = 1.65e-10;//A/micron
- I_g_on_n[0][10] = 1.65e-10;
- I_g_on_n[0][20] = 1.65e-10;
- I_g_on_n[0][30] = 1.65e-10;
- I_g_on_n[0][40] = 1.65e-10;
- I_g_on_n[0][50] = 1.65e-10;
- I_g_on_n[0][60] = 1.65e-10;
- I_g_on_n[0][70] = 1.65e-10;
- I_g_on_n[0][80] = 1.65e-10;
- I_g_on_n[0][90] = 1.65e-10;
- I_g_on_n[0][100] = 1.65e-10;
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm
- curr_core_tx_density = 1.25*0.7*0.7*0.4;
- curr_sckt_co_eff = 1.11;
- curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb
-
- }
-
- if (tech == 90)
- {
- SENSE_AMP_D = .28e-9; // s
- SENSE_AMP_P = 14.7e-15; // J
- //90nm technology-node. Corresponds to year 2004 in ITRS
- //ITRS HP device type
- vdd[0] = 1.2;
- vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];
- alpha_power_law[0]=1.34;
- Lphy[0] = 0.037;//Lphy is the physical gate-length. micron
- Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron
- t_ox[0] = 1.2e-3;//micron
- v_th[0] = 0.23707;//V
- c_ox[0] = 1.79e-14;//F/micron2
- mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 0.128; //V
- c_g_ideal[0] = 6.64e-16;//F/micron
- c_fringe[0] = 0.08e-15;//F/micron
- c_junc[0] = 1e-15;//F/micron2
- I_on_n[0] = 1076.9e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron with ap-law applied for dvs and arbitrary vdd
- I_on_p[0] = 712.6e-6;//A/micron
- //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline
- nmos_effective_resistance_multiplier = 1.54;
- n_to_p_eff_curr_drv_ratio[0] = 2.45;
- gmp_to_gmn_multiplier[0] = 1.22;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1;
- I_off_n[0][0] = 3.24e-8*pow(vdd_real[0]/(vdd[0]),4);//A/micron
- I_off_n[0][10] = 4.01e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][20] = 4.90e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][30] = 5.92e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][40] = 7.08e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][50] = 8.38e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][60] = 9.82e-8*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][70] = 1.14e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][80] = 1.29e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][90] = 1.43e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][100] = 1.54e-7*pow(vdd_real[0]/(vdd[0]),4);
-
- I_g_on_n[0][0] = 1.65e-8;//A/micron
- I_g_on_n[0][10] = 1.65e-8;
- I_g_on_n[0][20] = 1.65e-8;
- I_g_on_n[0][30] = 1.65e-8;
- I_g_on_n[0][40] = 1.65e-8;
- I_g_on_n[0][50] = 1.65e-8;
- I_g_on_n[0][60] = 1.65e-8;
- I_g_on_n[0][70] = 1.65e-8;
- I_g_on_n[0][80] = 1.65e-8;
- I_g_on_n[0][90] = 1.65e-8;
- I_g_on_n[0][100] = 1.65e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.3;
- vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];
- alpha_power_law[1]=1.47;
- Lphy[1] = 0.075;
- Lelec[1] = 0.0486;
- t_ox[1] = 2.2e-3;
- v_th[1] = 0.48203;
- c_ox[1] = 1.22e-14;
- mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.373;
- c_g_ideal[1] = 9.15e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 503.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);
- I_on_p[1] = 235.1e-6;
- nmos_effective_resistance_multiplier = 1.92;
- n_to_p_eff_curr_drv_ratio[1] = 2.44;
- gmp_to_gmn_multiplier[1] =0.88;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1;
- I_off_n[1][0] = 2.81e-12*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][10] = 4.76e-12*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][20] = 7.82e-12*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][30] = 1.25e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][40] = 1.94e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][50] = 2.94e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][60] = 4.36e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][70] = 6.32e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][80] = 8.95e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][90] = 1.25e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][100] = 1.7e-10*pow(vdd_real[1]/(vdd[1]),4);
-
- I_g_on_n[1][0] = 3.87e-11;//A/micron
- I_g_on_n[1][10] = 3.87e-11;
- I_g_on_n[1][20] = 3.87e-11;
- I_g_on_n[1][30] = 3.87e-11;
- I_g_on_n[1][40] = 3.87e-11;
- I_g_on_n[1][50] = 3.87e-11;
- I_g_on_n[1][60] = 3.87e-11;
- I_g_on_n[1][70] = 3.87e-11;
- I_g_on_n[1][80] = 3.87e-11;
- I_g_on_n[1][90] = 3.87e-11;
- I_g_on_n[1][100] = 3.87e-11;
-
- //ITRS LOP device type
- vdd[2] = 0.9;
- vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];
- alpha_power_law[2]=1.55;
- Lphy[2] = 0.053;
- Lelec[2] = 0.0354;
- t_ox[2] = 1.5e-3;
- v_th[2] = 0.30764;
- c_ox[2] = 1.59e-14;
- mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.113;
- c_g_ideal[2] = 8.45e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 386.6e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);
- I_on_p[2] = 209.7e-6;
- nmos_effective_resistance_multiplier = 1.77;
- n_to_p_eff_curr_drv_ratio[2] = 2.54;
- gmp_to_gmn_multiplier[2] = 0.98;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1;
- I_off_n[2][0] = 2.14e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][10] = 2.9e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][20] = 3.87e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][30] = 5.07e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][40] = 6.54e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][50] = 8.27e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][60] = 1.02e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][70] = 1.20e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][80] = 1.36e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][90] = 1.52e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][100] = 1.73e-8*pow(vdd_real[2]/(vdd[2]),5);
-
- I_g_on_n[2][0] = 4.31e-8;//A/micron
- I_g_on_n[2][10] = 4.31e-8;
- I_g_on_n[2][20] = 4.31e-8;
- I_g_on_n[2][30] = 4.31e-8;
- I_g_on_n[2][40] = 4.31e-8;
- I_g_on_n[2][50] = 4.31e-8;
- I_g_on_n[2][60] = 4.31e-8;
- I_g_on_n[2][70] = 4.31e-8;
- I_g_on_n[2][80] = 4.31e-8;
- I_g_on_n[2][90] = 4.31e-8;
- I_g_on_n[2][100] = 4.31e-8;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.4545;
- width_dram_access_transistor = 0.14;
- curr_I_on_dram_cell = 45e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.168;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.4545;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.3;
- c_g_ideal[3] = 1.47e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 321.6e-6;
- I_on_p[3] = 203.3e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.42e-11;
- I_off_n[3][10] = 2.25e-11;
- I_off_n[3][20] = 3.46e-11;
- I_off_n[3][30] = 5.18e-11;
- I_off_n[3][40] = 7.58e-11;
- I_off_n[3][50] = 1.08e-10;
- I_off_n[3][60] = 1.51e-10;
- I_off_n[3][70] = 2.02e-10;
- I_off_n[3][80] = 2.57e-10;
- I_off_n[3][90] = 3.14e-10;
- I_off_n[3][100] = 3.85e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.6;
- Lphy[3] = 0.09;
- Lelec[3] = 0.0576;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.09*0.09;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.7;
- t_ox[3] = 5.5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 5.65e-15;
- mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.32;
- c_g_ideal[3] = 5.08e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1094.3e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.62;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 5.80e-15;
- I_off_n[3][10] = 1.21e-14;
- I_off_n[3][20] = 2.42e-14;
- I_off_n[3][30] = 4.65e-14;
- I_off_n[3][40] = 8.60e-14;
- I_off_n[3][50] = 1.54e-13;
- I_off_n[3][60] = 2.66e-13;
- I_off_n[3][70] = 4.45e-13;
- I_off_n[3][80] = 7.17e-13;
- I_off_n[3][90] = 1.11e-12;
- I_off_n[3][100] = 1.67e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360
- curr_asp_ratio_cell_cam = 2.92;//2.5
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 1;
- curr_core_tx_density = 1.25*0.7*0.7;
- curr_sckt_co_eff = 1.1539;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
-
-
- }
-
- if (tech == 65)
- { //65nm technology-node. Corresponds to year 2007 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .2e-9; // s
- SENSE_AMP_P = 5.7e-15; // J
- vdd[0] = 1.1;
- vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];
- alpha_power_law[0]=1.27;
- Lphy[0] = 0.025;
- Lelec[0] = 0.019;
- t_ox[0] = 1.1e-3;
- v_th[0] = .19491;
- c_ox[0] = 1.88e-14;
- mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 7.71e-2;
- c_g_ideal[0] = 4.69e-16;
- c_fringe[0] = 0.077e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 1197.2e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);
- I_on_p[0] = 870.8e-6;
- nmos_effective_resistance_multiplier = 1.50;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.74;
- //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first
- //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74.
- I_off_n[0][0] = 1.96e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][10] = 2.29e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][20] = 2.66e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][30] = 3.05e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][40] = 3.49e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][50] = 3.95e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][60] = 4.45e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][70] = 4.97e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][80] = 5.48e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][90] = 5.94e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][100] = 6.3e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_g_on_n[0][0] = 4.09e-8;//A/micron
- I_g_on_n[0][10] = 4.09e-8;
- I_g_on_n[0][20] = 4.09e-8;
- I_g_on_n[0][30] = 4.09e-8;
- I_g_on_n[0][40] = 4.09e-8;
- I_g_on_n[0][50] = 4.09e-8;
- I_g_on_n[0][60] = 4.09e-8;
- I_g_on_n[0][70] = 4.09e-8;
- I_g_on_n[0][80] = 4.09e-8;
- I_g_on_n[0][90] = 4.09e-8;
- I_g_on_n[0][100] = 4.09e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.2;
- vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];//TODO
- alpha_power_law[1]=1.40;
- Lphy[1] = 0.045;
- Lelec[1] = 0.0298;
- t_ox[1] = 1.9e-3;
- v_th[1] = 0.52354;
- c_ox[1] = 1.36e-14;
- mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 0.128;
- c_g_ideal[1] = 6.14e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 519.2e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);
- I_on_p[1] = 266e-6;
- nmos_effective_resistance_multiplier = 1.96;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.82;
- I_off_n[1][0] = 9.12e-12*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][10] = 1.49e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][20] = 2.36e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][30] = 3.64e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][40] = 5.48e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][50] = 8.05e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][60] = 1.15e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][70] = 1.59e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][80] = 2.1e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][90] = 2.62e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][100] = 3.21e-10*pow(vdd_real[1]/(vdd[1]),4);
-
- I_g_on_n[1][0] = 1.09e-10;//A/micron
- I_g_on_n[1][10] = 1.09e-10;
- I_g_on_n[1][20] = 1.09e-10;
- I_g_on_n[1][30] = 1.09e-10;
- I_g_on_n[1][40] = 1.09e-10;
- I_g_on_n[1][50] = 1.09e-10;
- I_g_on_n[1][60] = 1.09e-10;
- I_g_on_n[1][70] = 1.09e-10;
- I_g_on_n[1][80] = 1.09e-10;
- I_g_on_n[1][90] = 1.09e-10;
- I_g_on_n[1][100] = 1.09e-10;
-
- //ITRS LOP device type
- vdd[2] = 0.8;
- alpha_power_law[2]=1.43;
- vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];
- Lphy[2] = 0.032;
- Lelec[2] = 0.0216;
- t_ox[2] = 1.2e-3;
- v_th[2] = 0.28512;
- c_ox[2] = 1.87e-14;
- mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 0.292;
- c_g_ideal[2] = 6e-16;
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 573.1e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);
- I_on_p[2] = 340.6e-6;
- nmos_effective_resistance_multiplier = 1.82;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/2.05;
- I_off_n[2][0] = 4.9e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][10] = 6.49e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][20] = 8.45e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][30] = 1.08e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][40] = 1.37e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][50] = 1.71e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][60] = 2.09e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][70] = 2.48e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][80] = 2.84e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][90] = 3.13e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][100] = 3.42e-8*pow(vdd_real[2]/(vdd[2]),5);
-
- I_g_on_n[2][0] = 9.61e-9;//A/micron
- I_g_on_n[2][10] = 9.61e-9;
- I_g_on_n[2][20] = 9.61e-9;
- I_g_on_n[2][30] = 9.61e-9;
- I_g_on_n[2][40] = 9.61e-9;
- I_g_on_n[2][50] = 9.61e-9;
- I_g_on_n[2][60] = 9.61e-9;
- I_g_on_n[2][70] = 9.61e-9;
- I_g_on_n[2][80] = 9.61e-9;
- I_g_on_n[2][90] = 9.61e-9;
- I_g_on_n[2][100] = 9.61e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.2;
- Lphy[3] = 0.12;
- Lelec[3] = 0.0756;
- curr_v_th_dram_access_transistor = 0.43806;
- width_dram_access_transistor = 0.09;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 0.11;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.6;
- t_ox[3] = 2.2e-3;
- v_th[3] = 0.43806;
- c_ox[3] = 1.22e-14;
- mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.43806;
- c_g_ideal[3] = 1.46e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 399.8e-6;
- I_on_p[3] = 243.4e-6;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.23e-11;
- I_off_n[3][10] = 3.46e-11;
- I_off_n[3][20] = 5.24e-11;
- I_off_n[3][30] = 7.75e-11;
- I_off_n[3][40] = 1.12e-10;
- I_off_n[3][50] = 1.58e-10;
- I_off_n[3][60] = 2.18e-10;
- I_off_n[3][70] = 2.88e-10;
- I_off_n[3][80] = 3.63e-10;
- I_off_n[3][90] = 4.41e-10;
- I_off_n[3][100] = 5.36e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.3;
- Lphy[3] = 0.065;
- Lelec[3] = 0.0426;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.065;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.065*0.065;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 3.3;
- t_ox[3] = 5e-3;
- v_th[3] = 1.0;
- c_ox[3] = 6.16e-15;
- mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.385;
- c_g_ideal[3] = 4e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15 ;
- I_on_n[3] = 1031e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 2.39;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.80e-14;
- I_off_n[3][10] = 3.64e-14;
- I_off_n[3][20] = 7.03e-14;
- I_off_n[3][30] = 1.31e-13;
- I_off_n[3][40] = 2.35e-13;
- I_off_n[3][50] = 4.09e-13;
- I_off_n[3][60] = 6.89e-13;
- I_off_n[3][70] = 1.13e-12;
- I_off_n[3][80] = 1.78e-12;
- I_off_n[3][90] = 2.71e-12;
- I_off_n[3][100] = 3.99e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor
- curr_core_tx_density = 1.25*0.7;
- curr_sckt_co_eff = 1.1359;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if (tech == 45)
- { //45nm technology-node. Corresponds to year 2010 in ITRS
- //ITRS HP device type
- SENSE_AMP_D = .04e-9; // s
- SENSE_AMP_P = 2.7e-15; // J
- vdd[0] = 1.0;
- vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];//TODO
- alpha_power_law[0]=1.21;
- Lphy[0] = 0.018;
- Lelec[0] = 0.01345;
- t_ox[0] = 0.65e-3;
- v_th[0] = .18035;
- c_ox[0] = 3.77e-14;
- mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 9.38E-2;
- c_g_ideal[0] = 6.78e-16;
- c_fringe[0] = 0.05e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2046.6e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);
- //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of
- //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm
- I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI
- nmos_effective_resistance_multiplier = 1.51;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
- long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
- I_off_n[0][0] = 2.8e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][10] = 3.28e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][20] = 3.81e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][30] = 4.39e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][40] = 5.02e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][50] = 5.69e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][60] = 6.42e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][70] = 7.2e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][80] = 8.03e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][90] = 8.91e-7*pow(vdd_real[0]/(vdd[0]),4);
- I_off_n[0][100] = 9.84e-7*pow(vdd_real[0]/(vdd[0]),4);
-
- I_g_on_n[0][0] = 3.59e-8;//A/micron
- I_g_on_n[0][10] = 3.59e-8;
- I_g_on_n[0][20] = 3.59e-8;
- I_g_on_n[0][30] = 3.59e-8;
- I_g_on_n[0][40] = 3.59e-8;
- I_g_on_n[0][50] = 3.59e-8;
- I_g_on_n[0][60] = 3.59e-8;
- I_g_on_n[0][70] = 3.59e-8;
- I_g_on_n[0][80] = 3.59e-8;
- I_g_on_n[0][90] = 3.59e-8;
- I_g_on_n[0][100] = 3.59e-8;
-
- //ITRS LSTP device type
- vdd[1] = 1.1;
- vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];
- alpha_power_law[1]=1.33;
- Lphy[1] = 0.028;
- Lelec[1] = 0.0212;
- t_ox[1] = 1.4e-3;
- v_th[1] = 0.50245;
- c_ox[1] = 2.01e-14;
- mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 9.12e-2;
- c_g_ideal[1] = 5.18e-16;
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 666.2e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/2.08;
- I_off_n[1][0] = 1.01e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][10] = 1.65e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][20] = 2.62e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][30] = 4.06e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][40] = 6.12e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][50] = 9.02e-11*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][60] = 1.3e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][70] = 1.83e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][80] = 2.51e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][90] = 3.29e-10*pow(vdd_real[1]/(vdd[1]),4);
- I_off_n[1][100] = 4.1e-10*pow(vdd_real[1]/(vdd[1]),4);
-
- I_g_on_n[1][0] = 9.47e-12;//A/micron
- I_g_on_n[1][10] = 9.47e-12;
- I_g_on_n[1][20] = 9.47e-12;
- I_g_on_n[1][30] = 9.47e-12;
- I_g_on_n[1][40] = 9.47e-12;
- I_g_on_n[1][50] = 9.47e-12;
- I_g_on_n[1][60] = 9.47e-12;
- I_g_on_n[1][70] = 9.47e-12;
- I_g_on_n[1][80] = 9.47e-12;
- I_g_on_n[1][90] = 9.47e-12;
- I_g_on_n[1][100] = 9.47e-12;
-
- //ITRS LOP device type
- vdd[2] = 0.7;
- vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO
- alpha_power_law[2]=1.39;
- Lphy[2] = 0.022;
- Lelec[2] = 0.016;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.22599;
- c_ox[2] = 2.82e-14;//F/micron2
- mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 5.71e-2;
- c_g_ideal[2] = 6.2e-16;
- c_fringe[2] = 0.073e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 748.9e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.76;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.92;
- I_off_n[2][0] = 4.03e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][10] = 5.02e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][20] = 6.18e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][30] = 7.51e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][40] = 9.04e-9*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][50] = 1.08e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][60] = 1.27e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][70] = 1.47e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][80] = 1.66e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][90] = 1.84e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][100] = 2.03e-8*pow(vdd_real[2]/(vdd[2]),5);
-
- I_g_on_n[2][0] = 3.24e-8;//A/micron
- I_g_on_n[2][10] = 4.01e-8;
- I_g_on_n[2][20] = 4.90e-8;
- I_g_on_n[2][30] = 5.92e-8;
- I_g_on_n[2][40] = 7.08e-8;
- I_g_on_n[2][50] = 8.38e-8;
- I_g_on_n[2][60] = 9.82e-8;
- I_g_on_n[2][70] = 1.14e-7;
- I_g_on_n[2][80] = 1.29e-7;
- I_g_on_n[2][90] = 1.43e-7;
- I_g_on_n[2][100] = 1.54e-7;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.078;
- Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44559;
- width_dram_access_transistor = 0.079;
- curr_I_on_dram_cell = 36e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2.1e-3;
- v_th[3] = 0.44559;
- c_ox[3] = 1.41e-14;
- mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.181;
- c_g_ideal[3] = 1.10e-15;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 456e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 2.54e-11;
- I_off_n[3][10] = 3.94e-11;
- I_off_n[3][20] = 5.95e-11;
- I_off_n[3][30] = 8.79e-11;
- I_off_n[3][40] = 1.27e-10;
- I_off_n[3][50] = 1.79e-10;
- I_off_n[3][60] = 2.47e-10;
- I_off_n[3][70] = 3.31e-10;
- I_off_n[3][80] = 4.26e-10;
- I_off_n[3][90] = 5.27e-10;
- I_off_n[3][100] = 6.46e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.1;
- Lphy[3] = 0.045;
- Lelec[3] = 0.0298;
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.045;
- curr_I_on_dram_cell = 20e-6;//A
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.045*0.045;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.7;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.98e-15;
- mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.147;
- c_g_ideal[3] = 3.59e-16;
- c_fringe[3] = 0.08e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 999.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.31e-14;
- I_off_n[3][10] = 2.68e-14;
- I_off_n[3][20] = 5.25e-14;
- I_off_n[3][30] = 9.88e-14;
- I_off_n[3][40] = 1.79e-13;
- I_off_n[3][50] = 3.15e-13;
- I_off_n[3][60] = 5.36e-13;
- I_off_n[3][70] = 8.86e-13;
- I_off_n[3][80] = 1.42e-12;
- I_off_n[3][90] = 2.20e-12;
- I_off_n[3][100] = 3.29e-12;
- }
-
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7;
- curr_core_tx_density = 1.25;
- curr_sckt_co_eff = 1.1387;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if (tech == 32)
- {
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
- //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for
- //HP and LSTP.
- vdd[0] = 0.9;
- vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];
- alpha_power_law[0]=1.19;
- Lphy[0] = 0.013;
- Lelec[0] = 0.01013;
- t_ox[0] = 0.5e-3;
- v_th[0] = 0.21835;
- c_ox[0] = 4.11e-14;
- mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[0] = 5.09E-2;
- c_g_ideal[0] = 5.34e-16;
- c_fringe[0] = 0.04e-15;
- c_junc[0] = 1e-15;
- I_on_n[0] = 2211.7e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);
- I_on_p[0] = I_on_n[0] / 2;
- nmos_effective_resistance_multiplier = 1.49;
- n_to_p_eff_curr_drv_ratio[0] = 2.41;
- gmp_to_gmn_multiplier[0] = 1.38;
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.706;
- //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%),
- //whichever comes first
- I_off_n[0][0] = 1.52e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][10] = 1.55e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][20] = 1.59e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][30] = 1.68e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][40] = 1.90e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][50] = 2.69e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][60] = 5.32e-7*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][70] = 1.02e-6*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][80] = 1.62e-6*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][90] = 2.73e-6*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][100] = 6.1e-6*pow(vdd_real[0]/(vdd[0]),2);
-
- I_g_on_n[0][0] = 6.55e-8;//A/micron
- I_g_on_n[0][10] = 6.55e-8;
- I_g_on_n[0][20] = 6.55e-8;
- I_g_on_n[0][30] = 6.55e-8;
- I_g_on_n[0][40] = 6.55e-8;
- I_g_on_n[0][50] = 6.55e-8;
- I_g_on_n[0][60] = 6.55e-8;
- I_g_on_n[0][70] = 6.55e-8;
- I_g_on_n[0][80] = 6.55e-8;
- I_g_on_n[0][90] = 6.55e-8;
- I_g_on_n[0][100] = 6.55e-8;
-
-// 32 DG
-// I_g_on_n[0][0] = 2.71e-9;//A/micron
-// I_g_on_n[0][10] = 2.71e-9;
-// I_g_on_n[0][20] = 2.71e-9;
-// I_g_on_n[0][30] = 2.71e-9;
-// I_g_on_n[0][40] = 2.71e-9;
-// I_g_on_n[0][50] = 2.71e-9;
-// I_g_on_n[0][60] = 2.71e-9;
-// I_g_on_n[0][70] = 2.71e-9;
-// I_g_on_n[0][80] = 2.71e-9;
-// I_g_on_n[0][90] = 2.71e-9;
-// I_g_on_n[0][100] = 2.71e-9;
-
- //LSTP device type
- vdd[1] = 1;
- vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];
- alpha_power_law[1]=1.27;
- Lphy[1] = 0.020;
- Lelec[1] = 0.0173;
- t_ox[1] = 1.2e-3;
- v_th[1] = 0.513;
- c_ox[1] = 2.29e-14;
- mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[1] = 8.64e-2;
- c_g_ideal[1] = 4.58e-16;
- c_fringe[1] = 0.053e-15;
- c_junc[1] = 1e-15;
- I_on_n[1] = 683.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2.23;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];
- long_channel_leakage_reduction[1] = 1/1.93;
- I_off_n[1][0] = 2.06e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][10] = 3.30e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][20] = 5.15e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][30] = 7.83e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][40] = 1.16e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][50] = 1.69e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][60] = 2.40e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][70] = 3.34e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][80] = 4.54e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][90] = 5.96e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][100] = 7.44e-10*pow(vdd_real[1]/(vdd[1]),1);
-
- I_g_on_n[1][0] = 3.73e-11;//A/micron
- I_g_on_n[1][10] = 3.73e-11;
- I_g_on_n[1][20] = 3.73e-11;
- I_g_on_n[1][30] = 3.73e-11;
- I_g_on_n[1][40] = 3.73e-11;
- I_g_on_n[1][50] = 3.73e-11;
- I_g_on_n[1][60] = 3.73e-11;
- I_g_on_n[1][70] = 3.73e-11;
- I_g_on_n[1][80] = 3.73e-11;
- I_g_on_n[1][90] = 3.73e-11;
- I_g_on_n[1][100] = 3.73e-11;
-
-
- //LOP device type
- vdd[2] = 0.6;
- vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO
- alpha_power_law[2]=1.26;
- Lphy[2] = 0.016;
- Lelec[2] = 0.01232;
- t_ox[2] = 0.9e-3;
- v_th[2] = 0.24227;
- c_ox[2] = 2.84e-14;
- mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[2] = 4.64e-2;
- c_g_ideal[2] = 4.54e-16;
- c_fringe[2] = 0.057e-15;
- c_junc[2] = 1e-15;
- I_on_n[2] = 827.8e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2.28;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];
- long_channel_leakage_reduction[2] = 1/1.89;
- I_off_n[2][0] = 5.94e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][10] = 7.23e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][20] = 8.7e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][30] = 1.04e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][40] = 1.22e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][50] = 1.43e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][60] = 1.65e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][70] = 1.90e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][80] = 2.15e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][90] = 2.39e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][100] = 2.63e-7*pow(vdd_real[2]/(vdd[2]),5);
-
- I_g_on_n[2][0] = 2.93e-9;//A/micron
- I_g_on_n[2][10] = 2.93e-9;
- I_g_on_n[2][20] = 2.93e-9;
- I_g_on_n[2][30] = 2.93e-9;
- I_g_on_n[2][40] = 2.93e-9;
- I_g_on_n[2][50] = 2.93e-9;
- I_g_on_n[2][60] = 2.93e-9;
- I_g_on_n[2][70] = 2.93e-9;
- I_g_on_n[2][80] = 2.93e-9;
- I_g_on_n[2][90] = 2.93e-9;
- I_g_on_n[2][100] = 2.93e-9;
-
- if (ram_cell_tech_type == lp_dram)
- {
- //LP-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.056;
- Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 0.44129;
- width_dram_access_transistor = 0.056;
- curr_I_on_dram_cell = 36e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0;
- curr_asp_ratio_cell_dram = 1.46;
- curr_c_dram_cell = 20e-15;
-
- //LP-DRAM wordline transistor parameters
- curr_vpp = 1.5;
- t_ox[3] = 2e-3;
- v_th[3] = 0.44467;
- c_ox[3] = 1.48e-14;
- mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.174;
- c_g_ideal[3] = 7.45e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1055.4e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.65;
- n_to_p_eff_curr_drv_ratio[3] = 2.05;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.57e-11;
- I_off_n[3][10] = 5.51e-11;
- I_off_n[3][20] = 8.27e-11;
- I_off_n[3][30] = 1.21e-10;
- I_off_n[3][40] = 1.74e-10;
- I_off_n[3][50] = 2.45e-10;
- I_off_n[3][60] = 3.38e-10;
- I_off_n[3][70] = 4.53e-10;
- I_off_n[3][80] = 5.87e-10;
- I_off_n[3][90] = 7.29e-10;
- I_off_n[3][100] = 8.87e-10;
- }
- else if (ram_cell_tech_type == comm_dram)
- {
- //COMM-DRAM cell access transistor technology parameters
- curr_vdd_dram_cell = 1.0;
- Lphy[3] = 0.032;
- Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors.
- curr_v_th_dram_access_transistor = 1;
- width_dram_access_transistor = 0.032;
- curr_I_on_dram_cell = 20e-6;
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.032*0.032;
- curr_asp_ratio_cell_dram = 1.5;
- curr_c_dram_cell = 30e-15;
-
- //COMM-DRAM wordline transistor parameters
- curr_vpp = 2.6;
- t_ox[3] = 4e-3;
- v_th[3] = 1.0;
- c_ox[3] = 7.99e-15;
- mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6);
- Vdsat[3] = 0.129;
- c_g_ideal[3] = 2.56e-16;
- c_fringe[3] = 0.053e-15;
- c_junc[3] = 1e-15;
- I_on_n[3] = 1024.5e-6;
- I_on_p[3] = I_on_n[3] / 2;
- nmos_effective_resistance_multiplier = 1.69;
- n_to_p_eff_curr_drv_ratio[3] = 1.95;
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 3.63e-14;
- I_off_n[3][10] = 7.18e-14;
- I_off_n[3][20] = 1.36e-13;
- I_off_n[3][30] = 2.49e-13;
- I_off_n[3][40] = 4.41e-13;
- I_off_n[3][50] = 7.55e-13;
- I_off_n[3][60] = 1.26e-12;
- I_off_n[3][70] = 2.03e-12;
- I_off_n[3][80] = 3.19e-12;
- I_off_n[3][90] = 4.87e-12;
- I_off_n[3][100] = 7.16e-12;
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7;
- curr_sckt_co_eff = 1.1111;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 22){
- SENSE_AMP_D = .03e-9; // s
- SENSE_AMP_P = 2.16e-15; // J
- //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm
- //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP.
- //22 nm HP
- vdd[0] = 0.8;
- vdd_real[0] = g_ip->specific_hp_vdd ? g_ip->hp_Vdd : vdd[0];//TODO
- alpha_power_law[0]=1.2;//1.3//1.15;
- Lphy[0] = 0.009;//Lphy is the physical gate-length.
- Lelec[0] = 0.00468;//Lelec is the electrical gate-length.
- t_ox[0] = 0.55e-3;//micron
- v_th[0] = 0.1395;//V
- c_ox[0] = 3.63e-14;//F/micron2
- mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 2.33e-2; //V/micron
- c_g_ideal[0] = 3.27e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron
- c_junc[0] = 0;//F/micron2
- I_on_n[0] = 2626.4e-6*pow((vdd_real[0]-v_th[0])/(vdd[0]-v_th[0]),alpha_power_law[0]);//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.45;
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd_real[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/3.274;
- I_off_n[0][0] = 1.52e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
- I_off_n[0][10] = 1.55e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][20] = 1.59e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][30] = 1.68e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][40] = 1.90e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][50] = 2.69e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][60] = 5.32e-7/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][70] = 1.02e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][80] = 1.62e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][90] = 2.73e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- I_off_n[0][100] = 6.1e-6/1.5*1.2*pow(vdd_real[0]/(vdd[0]),2);
- //for 22nm DG HP
- I_g_on_n[0][0] = 1.81e-9;//A/micron
- I_g_on_n[0][10] = 1.81e-9;
- I_g_on_n[0][20] = 1.81e-9;
- I_g_on_n[0][30] = 1.81e-9;
- I_g_on_n[0][40] = 1.81e-9;
- I_g_on_n[0][50] = 1.81e-9;
- I_g_on_n[0][60] = 1.81e-9;
- I_g_on_n[0][70] = 1.81e-9;
- I_g_on_n[0][80] = 1.81e-9;
- I_g_on_n[0][90] = 1.81e-9;
- I_g_on_n[0][100] = 1.81e-9;
-
- //22 nm LSTP DG
- vdd[1] = 0.8;
- vdd_real[1] = g_ip->specific_lstp_vdd ? g_ip->lstp_Vdd : vdd[1];//TODO
- alpha_power_law[1]=1.23;
- Lphy[1] = 0.014;
- Lelec[1] = 0.008;//Lelec is the electrical gate-length.
- t_ox[1] = 1.1e-3;//micron
- v_th[1] = 0.40126;//V
- c_ox[1] = 2.30e-14;//F/micron2
- mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[1] = 6.64e-2; //V/micron
- c_g_ideal[1] = 3.22e-16;//F/micron
- c_fringe[1] = 0.08e-15;
- c_junc[1] = 0;//F/micron2
- I_on_n[1] = 727.6e-6*pow((vdd_real[1]-v_th[1])/(vdd[1]-v_th[1]),alpha_power_law[1]);//A/micron
- I_on_p[1] = I_on_n[1] / 2;
- nmos_effective_resistance_multiplier = 1.99;
- n_to_p_eff_curr_drv_ratio[1] = 2;
- gmp_to_gmn_multiplier[1] = 0.99;
- Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd_real[1] / I_on_n[1];//ohm-micron
- Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
- long_channel_leakage_reduction[1] = 1/1.89;
- I_off_n[1][0] = 2.43e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][10] = 4.85e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][20] = 9.68e-11*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][30] = 1.94e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][40] = 3.87e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][50] = 7.73e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][60] = 3.55e-10*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][70] = 3.09e-9*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][80] = 6.19e-9*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][90] = 1.24e-8*pow(vdd_real[1]/(vdd[1]),1);
- I_off_n[1][100]= 2.48e-8*pow(vdd_real[1]/(vdd[1]),1);
-
- I_g_on_n[1][0] = 4.51e-10;//A/micron
- I_g_on_n[1][10] = 4.51e-10;
- I_g_on_n[1][20] = 4.51e-10;
- I_g_on_n[1][30] = 4.51e-10;
- I_g_on_n[1][40] = 4.51e-10;
- I_g_on_n[1][50] = 4.51e-10;
- I_g_on_n[1][60] = 4.51e-10;
- I_g_on_n[1][70] = 4.51e-10;
- I_g_on_n[1][80] = 4.51e-10;
- I_g_on_n[1][90] = 4.51e-10;
- I_g_on_n[1][100] = 4.51e-10;
-
- //22 nm LOP
- vdd[2] = 0.6;
- vdd_real[2] = g_ip->specific_lop_vdd ? g_ip->lop_Vdd : vdd[2];//TODO
- alpha_power_law[2]=1.21;
- Lphy[2] = 0.011;
- Lelec[2] = 0.00604;//Lelec is the electrical gate-length.
- t_ox[2] = 0.8e-3;//micron
- v_th[2] = 0.2315;//V
- c_ox[2] = 2.87e-14;//F/micron2
- mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[2] = 1.81e-2; //V/micron
- c_g_ideal[2] = 3.16e-16;//F/micron
- c_fringe[2] = 0.08e-15;
- c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab
- I_on_n[2] = 916.1e-6*pow((vdd_real[2]-v_th[2])/(vdd[2]-v_th[2]),alpha_power_law[2]);//A/micron
- I_on_p[2] = I_on_n[2] / 2;
- nmos_effective_resistance_multiplier = 1.73;
- n_to_p_eff_curr_drv_ratio[2] = 2;
- gmp_to_gmn_multiplier[2] = 1.11;
- Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd_real[2] / I_on_n[2];//ohm-micron
- Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron
- long_channel_leakage_reduction[2] = 1/2.38;
-
- I_off_n[2][0] = 1.31e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][10] = 2.60e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][20] = 5.14e-8*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][30] = 1.02e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][40] = 2.02e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][50] = 3.99e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][60] = 7.91e-7*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][70] = 1.09e-6*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][80] = 2.09e-6*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][90] = 4.04e-6*pow(vdd_real[2]/(vdd[2]),5);
- I_off_n[2][100]= 4.48e-6*pow(vdd_real[2]/(vdd[2]),5);
-
- I_g_on_n[2][0] = 2.74e-9;//A/micron
- I_g_on_n[2][10] = 2.74e-9;
- I_g_on_n[2][20] = 2.74e-9;
- I_g_on_n[2][30] = 2.74e-9;
- I_g_on_n[2][40] = 2.74e-9;
- I_g_on_n[2][50] = 2.74e-9;
- I_g_on_n[2][60] = 2.74e-9;
- I_g_on_n[2][70] = 2.74e-9;
- I_g_on_n[2][80] = 2.74e-9;
- I_g_on_n[2][90] = 2.74e-9;
- I_g_on_n[2][100] = 2.74e-9;
-
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
- //parameters
- curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
- //2005 ITRS, the value was about twice the value in 2007 ITRS
- Lphy[3] = 0.022;//micron
- Lelec[3] = 0.0181;//micron.
- curr_v_th_dram_access_transistor = 1;//V
- width_dram_access_transistor = 0.022;//micron
- curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
- //kept constant. In reality this could perhaps be lower
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
- curr_asp_ratio_cell_dram = 0.667;
- curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
- //kept constant.
-
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
- curr_vpp = 2.3;//vpp. V
- t_ox[3] = 3.5e-3;//micron
- v_th[3] = 1.0;//V
- c_ox[3] = 9.06e-15;//F/micron2
- mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
- Vdsat[3] = 0.0972; //V/micron
- c_g_ideal[3] = 1.99e-16;//F/micron
- c_fringe[3] = 0.053e-15;//F/micron
- c_junc[3] = 1e-15;//F/micron2
- I_on_n[3] = 910.5e-6;//A/micron
- I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
- //
- n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.1e-13; //A/micron
- I_off_n[3][10] = 2.11e-13;
- I_off_n[3][20] = 3.88e-13;
- I_off_n[3][30] = 6.9e-13;
- I_off_n[3][40] = 1.19e-12;
- I_off_n[3][50] = 1.98e-12;
- I_off_n[3][60] = 3.22e-12;
- I_off_n[3][70] = 5.09e-12;
- I_off_n[3][80] = 7.85e-12;
- I_off_n[3][90] = 1.18e-11;
- I_off_n[3][100] = 1.72e-11;
-
- }
- else
- {
- //some error handler
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- if(tech == 16){
- //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm
- //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP.
- //16 nm HP
- vdd[0] = 0.7;
- Lphy[0] = 0.006;//Lphy is the physical gate-length.
- Lelec[0] = 0.00315;//Lelec is the electrical gate-length.
- t_ox[0] = 0.5e-3;//micron
- v_th[0] = 0.1489;//V
- c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR
- mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
- Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet
- c_g_ideal[0] = 2.30e-16;//F/micron
- c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3
- c_junc[0] = 0;//F/micron2 MASTAR result dynamic
- I_on_n[0] = 2768.4e-6;//A/micron
- I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current.
- n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in
- //"Dynamic" tab of Device workspace.
- gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value.
- Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
- Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
- long_channel_leakage_reduction[0] = 1/2.655;
- I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07;
- I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07;
- I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07;
- I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07;
- I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07;
- I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07;
- I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07;
- I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07;
- I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07;
- I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07;
- I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07;
- //for 16nm DG HP
- I_g_on_n[0][0] = 1.07e-9;//A/micron
- I_g_on_n[0][10] = 1.07e-9;
- I_g_on_n[0][20] = 1.07e-9;
- I_g_on_n[0][30] = 1.07e-9;
- I_g_on_n[0][40] = 1.07e-9;
- I_g_on_n[0][50] = 1.07e-9;
- I_g_on_n[0][60] = 1.07e-9;
- I_g_on_n[0][70] = 1.07e-9;
- I_g_on_n[0][80] = 1.07e-9;
- I_g_on_n[0][90] = 1.07e-9;
- I_g_on_n[0][100] = 1.07e-9;
-
-// //16 nm LSTP DG
-// vdd[1] = 0.8;
-// Lphy[1] = 0.014;
-// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
-// t_ox[1] = 1.1e-3;//micron
-// v_th[1] = 0.40126;//V
-// c_ox[1] = 2.30e-14;//F/micron2
-// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
-// Vdsat[1] = 6.64e-2; //V/micron
-// c_g_ideal[1] = 3.22e-16;//F/micron
-// c_fringe[1] = 0.008e-15;
-// c_junc[1] = 0;//F/micron2
-// I_on_n[1] = 727.6e-6;//A/micron
-// I_on_p[1] = I_on_n[1] / 2;
-// nmos_effective_resistance_multiplier = 1.99;
-// n_to_p_eff_curr_drv_ratio[1] = 2;
-// gmp_to_gmn_multiplier[1] = 0.99;
-// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
-// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
-// I_off_n[1][0] = 2.43e-11;
-// I_off_n[1][10] = 4.85e-11;
-// I_off_n[1][20] = 9.68e-11;
-// I_off_n[1][30] = 1.94e-10;
-// I_off_n[1][40] = 3.87e-10;
-// I_off_n[1][50] = 7.73e-10;
-// I_off_n[1][60] = 3.55e-10;
-// I_off_n[1][70] = 3.09e-9;
-// I_off_n[1][80] = 6.19e-9;
-// I_off_n[1][90] = 1.24e-8;
-// I_off_n[1][100]= 2.48e-8;
-//
-// // for 22nm LSTP HP
-// I_g_on_n[1][0] = 4.51e-10;//A/micron
-// I_g_on_n[1][10] = 4.51e-10;
-// I_g_on_n[1][20] = 4.51e-10;
-// I_g_on_n[1][30] = 4.51e-10;
-// I_g_on_n[1][40] = 4.51e-10;
-// I_g_on_n[1][50] = 4.51e-10;
-// I_g_on_n[1][60] = 4.51e-10;
-// I_g_on_n[1][70] = 4.51e-10;
-// I_g_on_n[1][80] = 4.51e-10;
-// I_g_on_n[1][90] = 4.51e-10;
-// I_g_on_n[1][100] = 4.51e-10;
-
-
- if (ram_cell_tech_type == 3)
- {}
- else if (ram_cell_tech_type == 4)
- {
- //22 nm commodity DRAM cell access transistor technology parameters.
- //parameters
- curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
- //2005 ITRS, the value was about twice the value in 2007 ITRS
- Lphy[3] = 0.022;//micron
- Lelec[3] = 0.0181;//micron.
- curr_v_th_dram_access_transistor = 1;//V
- width_dram_access_transistor = 0.022;//micron
- curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always
- //kept constant. In reality this could perhaps be lower
- curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A
- curr_Wmemcella_dram = width_dram_access_transistor;
- curr_Wmemcellpmos_dram = 0;
- curr_Wmemcellnmos_dram = 0;
- curr_area_cell_dram = 6*0.022*0.022;//micron2.
- curr_asp_ratio_cell_dram = 0.667;
- curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus
- //kept constant.
-
- //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR.
- curr_vpp = 2.3;//vpp. V
- t_ox[3] = 3.5e-3;//micron
- v_th[3] = 1.0;//V
- c_ox[3] = 9.06e-15;//F/micron2
- mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs
- Vdsat[3] = 0.0972; //V/micron
- c_g_ideal[3] = 1.99e-16;//F/micron
- c_fringe[3] = 0.053e-15;//F/micron
- c_junc[3] = 1e-15;//F/micron2
- I_on_n[3] = 910.5e-6;//A/micron
- I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used.
- nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm.
- //
- n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm
- gmp_to_gmn_multiplier[3] = 0.90;
- Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron
- Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron
- long_channel_leakage_reduction[3] = 1;
- I_off_n[3][0] = 1.1e-13; //A/micron
- I_off_n[3][10] = 2.11e-13;
- I_off_n[3][20] = 3.88e-13;
- I_off_n[3][30] = 6.9e-13;
- I_off_n[3][40] = 1.19e-12;
- I_off_n[3][50] = 1.98e-12;
- I_off_n[3][60] = 3.22e-12;
- I_off_n[3][70] = 5.09e-12;
- I_off_n[3][80] = 7.85e-12;
- I_off_n[3][90] = 1.18e-11;
- I_off_n[3][100] = 1.72e-11;
-
- }
- else
- {
- //some error handler
- }
-
- //SRAM cell properties
- curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um;
- curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_sram = 1.46;
- //CAM cell properties //TODO: data need to be revisited
- curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um;
- curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um;
- curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um;
- curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;
- curr_asp_ratio_cell_cam = 2.92;
- //Empirical undifferetiated core/FU coefficient
- curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7;
- curr_core_tx_density = 1.25/0.7/0.7/0.7;
- curr_sckt_co_eff = 1.1296;
- curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2
- curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
- }
-
- /*
- * TODO:WL_Vcc does not need to retain data as long as the wordline enable signal is not active (of course enable signal will not be active since it is idle)
- * So, the WL_Vcc only need to balance the leakage reduction and the required waking up restore time (as mentioned in the 4.0Ghz 291 Mb SRAM Intel Paper)
- */
- g_tp.peri_global.Vdd += curr_alpha * vdd_real[peri_global_tech_type];//real vdd, user defined or itrs
- g_tp.peri_global.Vdd_default += curr_alpha * vdd[peri_global_tech_type];//itrs vdd this does not have to do within line interpolation loop, can be assigned directly
- g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type];
- g_tp.peri_global.Vcc_min_default += g_tp.peri_global.Vdd_default * 0.45;// Use minimal voltage to keep the device conducted.//g_tp.peri_global.Vth;
- g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type];
- g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type];
- g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type];
- g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type];
- g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type];
- g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type];
- g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type];
- g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type];
- g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type];
- g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type];
- g_tp.peri_global.n_to_p_eff_curr_drv_ratio
- += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type];
- g_tp.peri_global.long_channel_leakage_reduction
- += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type];
- g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];//*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//Consider the voltage change may affect the current density as well. TODO: polynomial curve-fitting based on MASTAR may not be accurate enough
- g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300];//*pow(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default,3);//To mimic the Vdd effect on Ioff (for the same device, dvs should not change default Ioff---only changes if device is different?? but MASTAR shows different results)
- g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300];
- gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type];
- g_tp.peri_global.Mobility_n += curr_alpha *mobility_eff[peri_global_tech_type];
-
- //Sleep tx uses LSTP devices
- g_tp.sleep_tx.Vdd += curr_alpha * vdd_real[1];
- g_tp.sleep_tx.Vdd_default += curr_alpha * vdd[1];
- g_tp.sleep_tx.Vth += curr_alpha * v_th[1];
- g_tp.sleep_tx.Vcc_min_default += g_tp.sleep_tx.Vdd;
- g_tp.sleep_tx.Vcc_min = g_tp.sleep_tx.Vcc_min_default;//user cannot change this, has to be decided by technology
- g_tp.sleep_tx.t_ox += curr_alpha * t_ox[1];
- g_tp.sleep_tx.C_ox += curr_alpha * c_ox[1];
- g_tp.sleep_tx.C_g_ideal += curr_alpha * c_g_ideal[1];
- g_tp.sleep_tx.C_fringe += curr_alpha * c_fringe[1];
- g_tp.sleep_tx.C_junc += curr_alpha * c_junc[1];
- g_tp.sleep_tx.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.sleep_tx.l_phy += curr_alpha * Lphy[1];
- g_tp.sleep_tx.l_elec += curr_alpha * Lelec[1];
- g_tp.sleep_tx.I_on_n += curr_alpha * I_on_n[1];
- g_tp.sleep_tx.R_nch_on += curr_alpha * Rnchannelon[1];
- g_tp.sleep_tx.R_pch_on += curr_alpha * Rpchannelon[1];
- g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio
- += curr_alpha * n_to_p_eff_curr_drv_ratio[1];
- g_tp.sleep_tx.long_channel_leakage_reduction
- += curr_alpha * long_channel_leakage_reduction[1];
- g_tp.sleep_tx.I_off_n += curr_alpha * I_off_n[1][g_ip->temp - 300];//**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4);
- g_tp.sleep_tx.I_off_p += curr_alpha * I_off_n[1][g_ip->temp - 300];//**pow(g_tp.sleep_tx.Vdd/g_tp.sleep_tx.Vdd_default,4);
- g_tp.sleep_tx.I_g_on_n += curr_alpha * I_g_on_n[1][g_ip->temp - 300];
- g_tp.sleep_tx.I_g_on_p += curr_alpha * I_g_on_n[1][g_ip->temp - 300];
- g_tp.sleep_tx.Mobility_n += curr_alpha *mobility_eff[1];
- // gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[1];
-
- g_tp.sram_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type];
- g_tp.sram_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.sram_cell.Vcc_min_default += g_tp.sram_cell.Vdd_default * 0.6;
- g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4);
- g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.sram_cell.Vdd/g_tp.sram_cell.Vdd_default,4);
- g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell;
- g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor;
- g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell;
- g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp;
- g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell;
- g_tp.vpp += curr_alpha * curr_vpp;
- g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor];
- g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor];
- g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor];
- g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor];
- g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor];
- g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor];
- g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor];
- g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor];
- g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
- g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300];
-
- g_tp.cam_cell.Vdd += curr_alpha * vdd_real[ram_cell_tech_type];
- g_tp.cam_cell.Vdd_default += curr_alpha * vdd[ram_cell_tech_type];
- g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type];
- g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type];
- g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type];
- g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type];
- g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type];
- g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type];
- g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type];
- g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron
- g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type];
- g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type];
- g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type];
- g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type];
- g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type];
- g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//*pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4);
- g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300];//**pow(g_tp.cam_cell.Vdd/g_tp.cam_cell.Vdd_default,4);
- g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
- g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300];
-
- g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram;
- g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram;
- g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram;
- area_cell_dram += curr_alpha * curr_area_cell_dram;
- asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram;
-
- g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram;
- g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram;
- g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram;
- area_cell_sram += curr_alpha * curr_area_cell_sram;
- asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram;
-
- g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng
- g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam;
- g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam;
- area_cell_cam += curr_alpha * curr_area_cell_cam;
- asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam;
-
- //Sense amplifier latch Gm calculation
- mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type];
- Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type];
-
- //Empirical undifferetiated core/FU coefficient
- g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff;
- g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density;
- g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead;
- g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead;
- g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff;
- }
-
-
- //Currently we are not modeling the resistance/capacitance of poly anywhere.
- //following data are continuous function (or data have been processed) does not need linear interpolation
- g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
- g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
- g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process
- g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process
- g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
- g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
- g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
- g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
-
- g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um;
- g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um;
- g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um;
- g_tp.cell_h_def = 50 * g_ip->F_sz_um;
- g_tp.w_poly_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_contact = g_ip->F_sz_um;
- g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um;
- g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um;
-
- g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
- g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
- g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
- g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
- g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
- g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
- g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
- g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_;
-
- if (ram_cell_tech_type == comm_dram)
- {
- g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
- g_tp.h_dec = 8; // in the unit of memory cell height
- }
- else
- {
- g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
- g_tp.h_dec = 4; // in the unit of memory cell height
- }
-
- g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal;
- g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal;
- g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal;
-
- g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal;
- g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n;
- //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p;
-
- g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal;
-
- double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global;
- double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch;
- g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch * pow((g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/(g_tp.peri_global.Vdd_default-g_tp.peri_global.Vth),1.3)/(g_tp.peri_global.Vdd/g_tp.peri_global.Vdd_default);
-
- g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram));
- g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w;
- g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram));
- g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w;
- g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng
- g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w;
-
- g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd;
- g_tp.sram.Vbitpre = g_tp.sram_cell.Vdd;//vdd[ram_cell_tech_type];
- g_tp.cam.Vbitpre = g_tp.cam_cell.Vdd;//vdd[ram_cell_tech_type];//Sheng
- pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
- g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
- g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_;
-
- //DVS and power-gating voltage finalization
- if ((g_tp.sram_cell.Vcc_min_default > g_tp.sram_cell.Vdd
- || g_tp.peri_global.Vdd < g_tp.peri_global.Vdd_default*0.75
- || g_tp.sram_cell.Vdd < g_tp.sram_cell.Vdd_default*0.75) && (!g_ip->is_main_mem))
- {
- cerr << "User defined Vdd is too low.\n\n"<< endl;
- exit(0);
- }
-
- if (g_ip->specific_vcc_min)
- {
- g_tp.sram_cell.Vcc_min = g_ip->user_defined_vcc_min;
- g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min;
- g_tp.sram.Vbitfloating = g_tp.sram.Vbitpre*0.7*(g_tp.sram_cell.Vcc_min/g_tp.peri_global.Vcc_min_default);
-// if (g_ip->user_defined_vcc_min < g_tp.peri_global.Vcc_min_default)
-// {
-// g_tp.peri_global.Vcc_min = g_ip->user_defined_vcc_min;
-// }
-// else {
-//
-// }
- }
- else
- {
- g_tp.sram_cell.Vcc_min = g_tp.sram_cell.Vcc_min_default;
- g_tp.peri_global.Vcc_min = g_tp.peri_global.Vcc_min_default;
- g_tp.sram.Vbitfloating = g_tp.sram.Vbitpre*0.7;
- }
-
- if (g_tp.sram_cell.Vcc_min < g_tp.sram_cell.Vcc_min_default )//if want to compute multiple power-gating vdd settings in one run, should have multiple results copies (each copy containing such flag) in update_pg ()
- {
- g_ip->user_defined_vcc_underflow = true;
- }
- else
- {
- g_ip->user_defined_vcc_underflow = false;
- }
-
- if ((g_tp.sram_cell.Vcc_min > g_tp.sram_cell.Vdd
- || g_tp.peri_global.Vcc_min > g_tp.peri_global.Vdd)&& (!g_ip->is_main_mem))
- {
- cerr << "User defined power-saving supply voltage cannot be lower than Vdd (DVS0).\n\n"<< endl;
- exit(0);
- }
- double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
- ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
-
- for (iter=0; iter<=1; ++iter)
- {
- // linear interpolation
- if (iter == 0)
- {
- tech = tech_lo;
- if (tech_lo == tech_hi)
- {
- curr_alpha = 1;
- }
- else
- {
- curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi);
- }
- }
- else
- {
- tech = tech_hi;
- if (tech_lo == tech_hi)
- {
- break;
- }
- else
- {
- curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi);
- }
- }
-
- if (tech == 180)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.0;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.017;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.75;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.75;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.2;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 1.5;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0]= 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.017;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.75;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.75;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.98;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.18;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
- wire_r_per_micron[1][3] = 12 / 0.18;
- }
- else if (tech == 90)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
- aspect_ratio[0][0] = 2.4;
- wire_width = wire_pitch[0][0] / 2; //micron
- wire_thickness = aspect_ratio[0][0] * wire_width;//micron
- wire_spacing = wire_pitch[0][0] - wire_width;//micron
- barrier_thickness = 0.01;//micron
- dishing_thickness = 0;//micron
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron
- ild_thickness[0][0] = 0.48;//micron
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.709;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15; //F/micron
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0],
- vert_dielectric_constant[0][0],
- fringe_cap);//F/micron.
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.4;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.48;//micron
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.709;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.7;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.96;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.709;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.008;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.48;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 3.038;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0],
- vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.48;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 3.038;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1],
- vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 1.1;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 3.038;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.09;
- wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
- wire_r_per_micron[1][3] = 12 / 0.09;
- }
- else if (tech == 65)
- {
- //Aggressive projections
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 2.7;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.405;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 2.303;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 2.7;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.405;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 2.303;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1],
- vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 2.8;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.81;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 2.303;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.006;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.405;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.734;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.405;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.734;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.77;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.734;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.065;
- wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
- wire_r_per_micron[1][3] = 12 / 0.065;
- }
- else if (tech == 45)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.315;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.958;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] ,
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.315;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.958;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.63;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.958;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.004;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.315;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.46;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.315;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.46;
- vert_dielectric_constant[1][1] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.55;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.46;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.045;
- wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
- wire_r_per_micron[1][3] = 12 / 0.045;
- }
- else if (tech == 32)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.21;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.664;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.21;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.664;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.42;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.664;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.21;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.214;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- aspect_ratio[1][1] = 2.0;
- wire_width = wire_pitch[1][1] / 2;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.21;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.214;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.385;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.214;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.032;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
- }
- else if (tech == 22)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.15;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.414;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- wire_width = wire_pitch[0][1] / 2;
- aspect_ratio[0][1] = 3.0;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.15;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.414;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.3;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.414;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.003;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.15;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 2.104;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.15;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 2.104;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.275;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 2.104;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.022;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- }
-
- else if (tech == 16)
- {
- //Aggressive projections.
- wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
- aspect_ratio[0][0] = 3.0;
- wire_width = wire_pitch[0][0] / 2;
- wire_thickness = aspect_ratio[0][0] * wire_width;
- wire_spacing = wire_pitch[0][0] - wire_width;
- barrier_thickness = 0;
- dishing_thickness = 0;
- alpha_scatter = 1;
- wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][0] = 0.108;
- miller_value[0][0] = 1.5;
- horiz_dielectric_constant[0][0] = 1.202;
- vert_dielectric_constant[0][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0],
- fringe_cap);
-
- wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global
- aspect_ratio[0][1] = 3.0;
- wire_width = wire_pitch[0][1] / 2;
- wire_thickness = aspect_ratio[0][1] * wire_width;
- wire_spacing = wire_pitch[0][1] - wire_width;
- wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][1] = 0.108;
- miller_value[0][1] = 1.5;
- horiz_dielectric_constant[0][1] = 1.202;
- vert_dielectric_constant[0][1] = 3.9;
- wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1],
- fringe_cap);
-
- wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global
- aspect_ratio[0][2] = 3.0;
- wire_width = wire_pitch[0][2] / 2;
- wire_thickness = aspect_ratio[0][2] * wire_width;
- wire_spacing = wire_pitch[0][2] - wire_width;
- wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[0][2] = 0.216;
- miller_value[0][2] = 1.5;
- horiz_dielectric_constant[0][2] = 1.202;
- vert_dielectric_constant[0][2] = 3.9;
- wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
- fringe_cap);
-
-// //*************************
-// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][4] - wire_width;
-// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][5] - wire_width;
-// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
-// aspect_ratio = 3.0;
-// wire_width = wire_pitch[0][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[0][6] - wire_width;
-// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.3;
-// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- //*************************
-
- //Conservative projections
- wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
- aspect_ratio[1][0] = 2.0;
- wire_width = wire_pitch[1][0] / 2;
- wire_thickness = aspect_ratio[1][0] * wire_width;
- wire_spacing = wire_pitch[1][0] - wire_width;
- barrier_thickness = 0.002;
- dishing_thickness = 0;
- alpha_scatter = 1.05;
- wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][0] = 0.108;
- miller_value[1][0] = 1.5;
- horiz_dielectric_constant[1][0] = 1.998;
- vert_dielectric_constant[1][0] = 3.9;
- fringe_cap = 0.115e-15;
- wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0],
- fringe_cap);
-
- wire_pitch[1][1] = 4 * g_ip->F_sz_um;
- wire_width = wire_pitch[1][1] / 2;
- aspect_ratio[1][1] = 2.0;
- wire_thickness = aspect_ratio[1][1] * wire_width;
- wire_spacing = wire_pitch[1][1] - wire_width;
- wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][1] = 0.108;
- miller_value[1][1] = 1.5;
- horiz_dielectric_constant[1][1] = 1.998;
- vert_dielectric_constant[1][1] = 3.9;
- wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1],
- fringe_cap);
-
- wire_pitch[1][2] = 8 * g_ip->F_sz_um;
- aspect_ratio[1][2] = 2.2;
- wire_width = wire_pitch[1][2] / 2;
- wire_thickness = aspect_ratio[1][2] * wire_width;
- wire_spacing = wire_pitch[1][2] - wire_width;
- dishing_thickness = 0.1 * wire_thickness;
- wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width,
- wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
- ild_thickness[1][2] = 0.198;
- miller_value[1][2] = 1.5;
- horiz_dielectric_constant[1][2] = 1.998;
- vert_dielectric_constant[1][2] = 3.9;
- wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
- ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2],
- fringe_cap);
- //Nominal projections for commodity DRAM wordline/bitline
- wire_pitch[1][3] = 2 * 0.016;//micron
- wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
- wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
-
- //******************
-// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][4] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][4] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][5] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][5] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
-//
-// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
-// aspect_ratio = 2.2;
-// wire_width = wire_pitch[1][6] / 2;
-// wire_thickness = aspect_ratio * wire_width;
-// wire_spacing = wire_pitch[1][6] - wire_width;
-// dishing_thickness = 0.1 * wire_thickness;
-// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
-// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
-// ild_thickness = 0.275;
-// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
-// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
-// fringe_cap);
- }
- g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
- g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
-
- g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
- g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
-
- g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
- g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
-
- g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
-
- g_tp.sense_delay += curr_alpha *SENSE_AMP_D;
- g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P;
-// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
-// g_tp.vert_dielectric_constant += vert_dielectric_constant;
-// g_tp.aspect_ratio += aspect_ratio;
-// g_tp.miller_value += miller_value;
-// g_tp.ild_thickness += ild_thickness;
-
- }
- g_tp.fringe_cap = fringe_cap;
-
- double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- double p_to_n_sizing_r = pmos_to_nmos_sz_ratio();
- double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0);
- double tf = rd * c_load;
- g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE);
- double KLOAD = 1;
- c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0));
- tf = rd * c_load;
- g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE);
-}
-
diff --git a/cacti/uca.cc b/cacti/uca.cc
deleted file mode 100755
index df1671b..0000000
--- a/cacti/uca.cc
+++ /dev/null
@@ -1,441 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#include
-#include
-
-#include "uca.h"
-
-
-UCA::UCA(const DynamicParameter & dyn_p)
- :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
-{
- int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
- int num_banks_hor_dir = nbanks/num_banks_ver_dir;
-
- if (dp.use_inp_params)
- {
- RWP = dp.num_rw_ports;
- ERP = dp.num_rd_ports;
- EWP = dp.num_wr_ports;
- SCHP = dp.num_search_ports;
- }
- else
- {
- RWP = g_ip->num_rw_ports;
- ERP = g_ip->num_rd_ports;
- EWP = g_ip->num_wr_ports;
- SCHP = g_ip->num_search_ports;
- }
-
- num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
- num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
- num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
- num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
- num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
-
- if (!dp.fully_assoc && !dp.pure_cam)
- {
-
- if (g_ip->fast_access && dp.is_tag == false)
- {
- num_do_b_bank *= g_ip->data_assoc;
- }
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- else
- {
-
- htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
- htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
- htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
- num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
- }
-
- area.w = htree_in_data->area.w;
- area.h = htree_in_data->area.h;
-
- area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
-// cout<<"area cell"<delay + bank.htree_in_add->delay;
- double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
- delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_1_predec->delay +
- bank.mat.sa_mux_lev_1_dec->delay;
- delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
- bank.mat.sa_mux_lev_2_predec->delay +
- bank.mat.sa_mux_lev_2_dec->delay;
- double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
-
- delay_before_subarray_output_driver =
- MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
- delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
- MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
- delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
- delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
- bank.htree_out_data->delay + htree_out_data->delay;
- access_time = bank.mat.delay_comparator;
-
- double ram_delay_inside_mat;
- if (dp.fully_assoc)
- {
- //delay of FA contains both CAM tag and RAM data
- { //delay of CAM
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- access_time = htree_in_add->delay + bank.htree_in_add->delay;
- //delay of fully-associative data array
- access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
- }
- }
- else
- {
- access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
- }
-
- if (dp.is_main_mem)
- {
- double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
- double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
- delay_from_subarray_out_drv_to_out;
- access_time = t_rcd + cas_latency;
- }
-
- double temp;
-
- if (!dp.fully_assoc)
- {
- temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
- if (dp.is_dram)
- {
- temp += bank.mat.delay_writeback; // temp stores random cycle time
- }
-
-
- temp = MAX(temp, bank.mat.r_predec->delay);
- temp = MAX(temp, bank.mat.b_mux_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
- else
- {
- ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
- temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
- + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
-
- temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
- temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
- temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
- }
-
- // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
- if (g_ip->rpters_in_htree == false)
- {
- temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
- }
- cycle_time = temp;
-
- double delay_req_network = max_delay_before_row_decoder;
- double delay_rep_network = delay_from_subarray_out_drv_to_out;
- multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
-
- if (dp.is_main_mem)
- {
- multisubbank_interleave_cycle_time = htree_in_add->delay;
- precharge_delay = htree_in_add->delay +
- bank.htree_in_add->delay + bank.mat.delay_writeback +
- bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
- cycle_time = access_time + precharge_delay;
- }
- else
- {
- precharge_delay = 0;
- }
-
- double dram_array_availability = 0;
- if (dp.is_dram)
- {
- dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
- }
-
- return outrisetime;
-}
-
-
-
-// note: currently, power numbers are for a bank of an array
-void UCA::compute_power_energy()
-{
- bank.compute_power_energy();
- power = bank.power;
-
- power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
- power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
- if (dp.fully_assoc || dp.pure_cam)
- {
- power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
- }
- power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
- htree_in_data->power.readOp.leakage +
- htree_out_data->power.readOp.leakage;
-
- power_routing_to_bank.readOp.power_gated_leakage += htree_in_add->power.readOp.power_gated_leakage +
- htree_in_data->power.readOp.power_gated_leakage +
- htree_out_data->power.readOp.power_gated_leakage;
-
- power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
- htree_in_data->power.readOp.gate_leakage +
- htree_out_data->power.readOp.gate_leakage;
- if (dp.fully_assoc || dp.pure_cam)
- {
- power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
- power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
- power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
- power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
- power.readOp.leakage += power_routing_to_bank.readOp.leakage;
- power.readOp.power_gated_leakage += power_routing_to_bank.readOp.power_gated_leakage;
- power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
-
- // calculate total write energy per access
- power.writeOp.dynamic = power.readOp.dynamic
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- - power_routing_to_bank.readOp.dynamic
- + power_routing_to_bank.writeOp.dynamic
- + bank.htree_in_data->power.readOp.dynamic
- - bank.htree_out_data->power.readOp.dynamic;
-
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- }
-
- dyn_read_energy_from_closed_page = power.readOp.dynamic;
- dyn_read_energy_from_open_page = power.readOp.dynamic -
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic +
- bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- dyn_read_energy_remaining_words_in_burst =
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
- ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- power_routing_to_bank.readOp.dynamic);
- dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
- dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
-
- activate_energy = htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
- (bank.mat.r_predec->power.readOp.dynamic +
- bank.mat.power_row_decoders.readOp.dynamic +
- bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
- read_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
- bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
- bank.htree_out_data->power.readOp.dynamic +
- htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
- write_energy = (htree_in_add->power.readOp.dynamic +
- bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
- htree_in_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic +
- (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
- bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
- precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
- bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
-
- //The follow 6 parameters are only used in DRAM/eDRAM output for now
- leak_power_subbank_closed_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_closed_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
- //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page =
- (bank.mat.r_predec->power.readOp.leakage +
- bank.mat.b_mux_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
- bank.mat.power_row_decoders.readOp.leakage +
- bank.mat.power_bit_mux_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
- bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_subbank_open_page +=
- (bank.mat.r_predec->power.readOp.gate_leakage +
- bank.mat.b_mux_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
- bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
- bank.mat.power_row_decoders.readOp.gate_leakage +
- bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
- bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
- //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
-
- leak_power_request_and_reply_networks =
- power_routing_to_bank.readOp.leakage +
- bank.htree_in_add->power.readOp.leakage +
- bank.htree_in_data->power.readOp.leakage +
- bank.htree_out_data->power.readOp.leakage;
-
- leak_power_request_and_reply_networks +=
- power_routing_to_bank.readOp.gate_leakage +
- bank.htree_in_add->power.readOp.gate_leakage +
- bank.htree_in_data->power.readOp.gate_leakage +
- bank.htree_out_data->power.readOp.gate_leakage;
-
- if (dp.fully_assoc || dp.pure_cam)
- {
- leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
- leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
- }
-
-
- if (dp.is_dram)
- { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
- refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
- bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
- refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- refresh_power /= dp.dram_refresh_period;
- }
-// The follow 6 parameters are only used in DRAM/eDRAM output for now
-
- if (dp.is_tag == false)
- {
- power.readOp.dynamic = dyn_read_energy_from_closed_page;
- power.writeOp.dynamic = dyn_read_energy_from_closed_page
- - dyn_read_energy_remaining_words_in_burst
- - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
- + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- + (power_routing_to_bank.writeOp.dynamic -
- power_routing_to_bank.readOp.dynamic -
- bank.htree_out_data->power.readOp.dynamic +
- bank.htree_in_data->power.readOp.dynamic) *
- (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
-
- if (dp.is_dram == false)
- {
- power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
- }
- }
-
- // if DRAM, add refresh power to total leakage
- if (dp.is_dram)
- {
- power.readOp.leakage += refresh_power;
- }
-
- // TODO: below should be avoided.
- /*if (dp.is_main_mem)
- {
- power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
- }*/
-
- assert(power.readOp.dynamic > 0);
- assert(power.writeOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
-}
-
diff --git a/cacti/wire.cc b/cacti/wire.cc
deleted file mode 100644
index 05c6de9..0000000
--- a/cacti/wire.cc
+++ /dev/null
@@ -1,883 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "wire.h"
-#include "cmath"
-// use this constructor to calculate wire stats
-Wire::Wire(
- enum Wire_type wire_model,
- double wl,
- int n,
- double w_s,
- double s_s,
- enum Wire_placement wp,
- double resistivity,
- TechnologyParameter::DeviceType *dt
- ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s),
- resistivity(resistivity), deviceType(dt)
-{
- wire_placement = wp;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
- if (initialized != 1) {
- cout << "Wire not initialized. Initializing it with default values\n";
- Wire winit;
- }
- calculate_wire_stats();
- // change everything back to seconds, microns, and Joules
- repeater_spacing *= 1e6;
- wire_length *= 1e6;
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- assert(wire_length > 0);
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
-}
-
- // the following values are for peripheral global technology
- // specified in the input config file
- Component Wire::global;
- Component Wire::global_5;
- Component Wire::global_10;
- Component Wire::global_20;
- Component Wire::global_30;
- Component Wire::low_swing;
-
- int Wire::initialized;
- double Wire::wire_width_init;
- double Wire::wire_spacing_init;
- double Wire::repeater_size_init; // value used in initialization should not be reused in final output
- double Wire::repeater_spacing_init;
-
-
-Wire::Wire(double w_s, double s_s, /*bool reset_repeater_sizing,*/ enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt)
-{
- w_scale = w_s;
- s_scale = s_s;
- deviceType = dt;
- wire_placement = wp;
- resistivity = resis;
- min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
- in_rise_time = 0;
- out_rise_time = 0;
-
- switch (wire_placement)
- {
- case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break;
- case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break;
- default: wire_width = g_tp.wire_local.pitch; break;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
- initialized = 1;
- init_wire();
- //init_wire(reset_repeater_sizing);
- wire_width_init = wire_width;
- wire_spacing_init = wire_spacing;
-
- assert(power.readOp.dynamic > 0);
- assert(power.readOp.leakage > 0);
- assert(power.readOp.gate_leakage > 0);
-}
-
-
-
-Wire::~Wire()
-{
-}
-
-
-
-void
-Wire::calculate_wire_stats()
-{
-
- if (wire_placement == outside_mat) {
- wire_width = g_tp.wire_outside_mat.pitch;
- }
- else if (wire_placement == inside_mat) {
- wire_width = g_tp.wire_inside_mat.pitch;
- }
- else {
- wire_width = g_tp.wire_local.pitch;
- }
-
- wire_spacing = wire_width;
-
- wire_width *= (w_scale * 1e-6/2) /* (m) */;
- wire_spacing *= (s_scale * 1e-6/2) /* (m) */;
-
-
- if (wt != Low_swing) {
-
- // delay_optimal_wire();
-
- if (wt == Global) {
- delay = global.delay * wire_length;
- power.readOp.dynamic = global.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global.area.w;
- repeater_size = global.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_5) {
- delay = global_5.delay * wire_length;
- power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_5.area.w;
- repeater_size = global_5.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_10) {
- delay = global_10.delay * wire_length;
- power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_10.area.w;
- repeater_size = global_10.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_20) {
- delay = global_20.delay * wire_length;
- power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_20.area.w;
- repeater_size = global_20.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- else if (wt == Global_30) {
- delay = global_30.delay * wire_length;
- power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
- power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
- power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length;
- repeater_spacing = global_30.area.w;
- repeater_size = global_30.area.h;
- area.set_area((wire_length/repeater_spacing) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_size,
- g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
- }
- out_rise_time = delay*repeater_spacing/deviceType->Vth;
-
- }
- else if (wt == Low_swing) {
- low_swing_model ();
- repeater_spacing = wire_length;
- repeater_size = 1;
-
- }
- else {
- assert(0);
- }
-
-// if (g_ip->interconect_power_gated)//TODO:actual sleep txs need to be added as in the wordline drivers,
-// //but since wires have enough space underneath for placement and routing of the sleep tx, the area overhead should be very small.
-// //performance loss and energy overhead is also very small because of the property of sleep tx.
-// {
-// power.readOp.leakage = power.readOp.leakage/deviceType->Vdd*deviceType->Vcc_min;
-// }
- power.readOp.power_gated_leakage = power.readOp.leakage/deviceType->Vdd*deviceType->Vcc_min;//TODO:
-}
-
-
-
-/*
- * The fall time of an input signal to the first stage of a circuit is
- * assumed to be same as the fall time of the output signal of two
- * inverters connected in series (refer: CACTI 1 Technical report,
- * section 6.1.3)
- */
- double
-Wire::signal_fall_time ()
-{
-
- /* rise time of inverter 1's output */
- double rt;
- /* fall time of inverter 2's output */
- double ft;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- return ft;
-}
-
-
-
-double Wire::signal_rise_time ()
-{
-
- /* rise time of inverter 1's output */
- double ft;
- /* fall time of inverter 2's output */
- double rt;
- double timeconst;
-
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(g_tp.min_w_nmos_, NCH, 1);
- rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
- timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
- tr_R_on(min_w_pmos, PCH, 1);
- ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
- return ft; //sec
-}
-
-
-
-/* Wire resistance and capacitance calculations
- * wire width
- *
- * /__/
- * | |
- * | | height = ASPECT_RATIO*wire width (ASPECT_RATIO = 2.2, ref: ITRS)
- * |__|/
- *
- * spacing between wires in same level = wire width
- * spacing between wires in adjacent levels = wire width---this is incorrect,
- * according to R.Ho's paper and thesis. ILD != wire width
- *
- */
-
-double Wire::wire_cap (double len /* in m */, bool call_from_outside)
-{
- //TODO: this should be consistent with the wire_res in technology file
- double sidewall, adj, tot_cap;
- double wire_height;
- double epsilon0 = 8.8542e-12;
- double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness;
-
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_outside_mat.miller_value;
- ild_thickness = g_tp.wire_outside_mat.ild_thickness;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
- miller_value = g_tp.wire_inside_mat.miller_value;
- ild_thickness = g_tp.wire_inside_mat.ild_thickness;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
- vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
- miller_value = g_tp.wire_local.miller_value;
- ild_thickness = g_tp.wire_local.ild_thickness;
- break;
- }
- }
-
- if (call_from_outside)
- {
- wire_width *= 1e-6;
- wire_spacing *= 1e-6;
- }
- wire_height = wire_width/w_scale*aspect_ratio;
- /*
- * assuming height does not change. wire_width = width_original*w_scale
- * So wire_height does not change as wire width increases
- */
-
-// capacitance between wires in the same level
-// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
-// * epsilon0;
-
- sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
- * epsilon0;
-
-
- // capacitance between wires in adjacent levels
- //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
- //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
-
- adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
- //Change ild_thickness from micron to M
-
- //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
- tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
-
- if (call_from_outside)
- {
- wire_width *= 1e6;
- wire_spacing *= 1e6;
- }
- return (tot_cap*len); // (F)
-}
-
-
- double
-Wire::wire_res (double len /*(in m)*/)
-{
-
- double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0;
- //TODO: this should be consistent with the wire_res in technology file
- //The whole computation should be consistent with the wire_res in technology.cc too!
-
- switch (wire_placement)
- {
- case outside_mat:
- {
- aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
- break;
- }
- case inside_mat :
- {
- aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
- break;
- }
- default:
- {
- aspect_ratio = g_tp.wire_local.aspect_ratio;
- break;
- }
- }
- return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)*
- (wire_width-2*barrier_thickness)));
-}
-
-/*
- * Calculates the delay, power and area of the transmitter circuit.
- *
- * The transmitter delay is the sum of nand gate delay, inverter delay
- * low swing nmos delay, and the wire delay
- * (ref: Technical report 6)
- */
- void
-Wire::low_swing_model()
-{
- double len = wire_length;
- double beta = pmos_to_nmos_sz_ratio();
-
-
- double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time;
-
- /* Final nmos low swing driver size calculation:
- * Try to size the driver such that the delay
- * is less than 8FO4.
- * If the driver size is greater than
- * the max allowable size, assume max size for the driver.
- * In either case, recalculate the delay using
- * the final driver size assuming slow input with
- * finite rise time instead of ideal step input
- *
- * (ref: Technical report 6)
- */
- double cwire = wire_cap(len); /* load capacitance */
- double rwire = wire_res(len);
-
-#define RES_ADJ (8.6) // Increase in resistance due to low driving vol.
-
- double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ;
- double nsize = R_to_w(driver_res, NCH);
-
- nsize = MIN(nsize, g_tp.max_w_nmos_);
- nsize = MAX(nsize, g_tp.min_w_nmos_);
-
- if(rwire*cwire > 8*g_tp.FO4)
- {
- nsize = g_tp.max_w_nmos_;
- }
-
- // size the inverter appropriately to minimize the transmitter delay
- // Note - In order to minimize leakage, we are not adding a set of inverters to
- // bring down delay. Instead, we are sizing the single gate
- // based on the logical effort.
- double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0)
- + gate_C(2*min_w_pmos, 0)));
- double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff;
- double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0));
- inv_size = MAX(inv_size, 1);
-
- /* nand gate delay */
- double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1));
- double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(inv_size*g_tp.min_w_nmos_, 0) +
- gate_C(inv_size*min_w_pmos, 0);
-
- double timeconst = res_eq * cap_eq;
-
- delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, RISE);
- double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd;
-
- inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */
-
- /* Inverter delay:
- * The load capacitance of this inv depends on
- * the gate capacitance of the final stage nmos
- * transistor which in turn depends on nsize
- */
- res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1);
- cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
- gate_C(nsize, 0);
- timeconst = res_eq * cap_eq;
-
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- deviceType->Vth/deviceType->Vdd, FALL);
- temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd;
-
-
- transmitter.delay = delay;
- transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/
- transmitter.power.readOp.leakage = deviceType->Vdd *
- (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- transmitter.power.readOp.gate_leakage = deviceType->Vdd *
- (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
- 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
-
- inputrise = delay / deviceType->Vth;
-
- /* nmos delay + wire delay */
- cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 +
- nsense * sense_amp_input_cap(); //+receiver cap
- /*
- * NOTE: nmos is used as both pull up and pull down transistor
- * in the transmitter. This is because for low voltage swing, drive
- * resistance of nmos is less than pmos
- * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency)
- */
- timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire +
- drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) +
- rwire*cwire/2 +
- (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) *
- nsense * sense_amp_input_cap();
-
- /*
- * since we are pre-equalizing and overdriving the low
- * swing wires, the net time constant is less
- * than the actual value
- */
- delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0);
-#define VOL_SWING .1
- temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */
- temp_power *= 2; /* differential wire */
-
- l_wire.delay = delay - transmitter.delay;
- l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic;
- l_wire.power.readOp.leakage = deviceType->Vdd*
- (4* cmos_Isub_leakage(nsize, 0, 1, nmos));
-
- l_wire.power.readOp.gate_leakage = deviceType->Vdd*
- (4* cmos_Ig_leakage(nsize, 0, 1, nmos));
-
- //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd,
- // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth;
-
- delay += g_tp.sense_delay;
-
- sense_amp.delay = g_tp.sense_delay;
- out_rise_time = g_tp.sense_delay/(deviceType->Vth);
- sense_amp.power.readOp.dynamic = g_tp.sense_dy_power;
- sense_amp.power.readOp.leakage = 0; //FIXME
- sense_amp.power.readOp.gate_leakage = 0;
-
- power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic;
- power.readOp.leakage = transmitter.power.readOp.leakage +
- l_wire.power.readOp.leakage +
- sense_amp.power.readOp.leakage;
- power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage +
- l_wire.power.readOp.gate_leakage +
- sense_amp.power.readOp.gate_leakage;
-}
-
- double
-Wire::sense_amp_input_cap()
-{
- return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
- gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
- drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def);
-}
-
-
-void Wire::delay_optimal_wire (/*bool reset_repeater_sizing*/)
-{
- double len = wire_length;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- double switching = 0; // switching energy
- double short_ckt = 0; // short-circuit energy
- double tc = 0; // time constant
- // input cap of min sized driver
- double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
-
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
-
- // wire cap /m
- double wc = wire_cap(len);
-
- // size the repeater such that the delay of the wire is minimum
- double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel
-
- // calc the optimum spacing between the repeaters (m)
-
-// if (reset_repeater_sizing==true) {
-
- repeater_spacing_init = sqrt(2 * out_res * (out_cap + input_cap)/
- ((wr/len)*(wc/len)));
- repeater_size_init = repeater_scaling;
-// }
-
- switching = (repeater_scaling * (input_cap + out_cap) +
- repeater_spacing_init * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
-
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing_init /repeater_scaling +
- wr/len * repeater_spacing_init * input_cap * repeater_scaling +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing_init * repeater_spacing_init ;
-
- delay = 0.693 * tc * len/repeater_spacing_init ;
-
-#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_scaling * tc;
-
- area.set_area((len/repeater_spacing_init ) *
- compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
- g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def));
- power.readOp.dynamic = ((len/repeater_spacing_init )*(switching + short_ckt));
- power.readOp.leakage = ((len/repeater_spacing_init )*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
- power.readOp.gate_leakage = ((len/repeater_spacing_init )*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
-}
-
-
-
-// calculate power/delay values for wires with suboptimal repeater sizing/spacing
-void
-Wire::init_wire(/*bool reset_repeater_sizing*/){
- wire_length = 1;
- delay_optimal_wire(/*reset_repeater_sizing*/);
- double sp, si;
- powerDef pow;
- si = repeater_size_init ;
- sp = repeater_spacing_init ;
- sp *= 1e6; // in microns
-
- double i, j, del;
- repeated_wire.push_back(Component());
- for (j=sp; j < 4*sp; j+=100) {
- for (i = si; i > 1; i--) {
- pow = wire_model(j*1e-6, i, &del);
- if (j == sp && i == si) {
- global.delay = del;
- global.power = pow;
- global.area.h = si;
- global.area.w = sp*1e-6; // m
- }
-// cout << "Repeater size - "<< i <<
-// " Repeater spacing - " << j <<
-// " Delay - " << del <<
-// " PowerD - " << pow.readOp.dynamic <<
-// " PowerL - " << pow.readOp.leakage <delay;
- low_swing.power = l_wire->power;
- delete l_wire;
-}
-
-
-
-void Wire::update_fullswing()
-{
-
- list::iterator citer;
- double del[4];
- del[3] = this->global.delay + this->global.delay*.3;
- del[2] = global.delay + global.delay*.2;
- del[1] = global.delay + global.delay*.1;
- del[0] = global.delay + global.delay*.05;
- double threshold;
- double ncost;
- double cost;
- int i = 4;
- while (i>0) {
- threshold = del[i-1];
- cost = BIGNUM;
- for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++)
- {
- if (citer->delay > threshold) {
- citer = repeated_wire.erase(citer);
- citer --;
- }
- else {
- ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic +
- citer->power.readOp.leakage/global.power.readOp.leakage;
- if(ncost < cost)
- {
- cost = ncost;
- if (i == 4) {
- global_30.delay = citer->delay;
- global_30.power = citer->power;
- global_30.area = citer->area;
- }
- else if (i==3) {
- global_20.delay = citer->delay;
- global_20.power = citer->power;
- global_20.area = citer->area;
- }
- else if(i==2) {
- global_10.delay = citer->delay;
- global_10.power = citer->power;
- global_10.area = citer->area;
- }
- else if(i==1) {
- global_5.delay = citer->delay;
- global_5.power = citer->power;
- global_5.area = citer->area;
- }
- }
- }
- }
- i--;
- }
- citer = repeated_wire.begin();
- while (!repeated_wire.empty()) //TODO: code optimize
- {citer=repeated_wire.erase(citer);}
-}
-
-
-
-powerDef Wire::wire_model (double space, double size, double *delay)
-{
- powerDef ptemp;
- double len = 1;
- //double min_wire_width = wire_width; //m
- double beta = pmos_to_nmos_sz_ratio();
- // switching energy
- double switching = 0;
- // short-circuit energy
- double short_ckt = 0;
- // time constant
- double tc = 0;
- // input cap of min sized driver
- double input_cap = gate_C (g_tp.min_w_nmos_ +
- min_w_pmos, 0);
-
- // output parasitic capacitance of
- // the min. sized driver
- double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
- drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def);
- // drive resistance
- double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) +
- tr_R_on(min_w_pmos, PCH, 1))/2;
- double wr = wire_res(len); //ohm
-
- // wire cap /m
- double wc = wire_cap(len);
-
- repeater_spacing = space;
- repeater_size = size;
-
- switching = (repeater_size * (input_cap + out_cap) +
- repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
-
- tc = out_res * (input_cap + out_cap) +
- out_res * wc/len * repeater_spacing/repeater_size +
- wr/len * repeater_spacing * out_cap * repeater_size +
- 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing;
-
- *delay = 0.693 * tc * len/repeater_spacing;
-
-#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */
- short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 *
- repeater_size * tc;
-
- ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt));
- ptemp.readOp.leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- ptemp.readOp.gate_leakage = ((len/repeater_spacing)*
- deviceType->Vdd*
- cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
-
- return ptemp;
-}
-
-void
-Wire::print_wire()
-{
-
- cout << "\nWire Properties at DVS level 0:\n\n";
- cout << " Delay Optimal\n\tRepeater size - "<< global.area.h <<
- " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)"
- " \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
- " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)"
- " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
- " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n";
- cout << "\tWire width - " <delay;
- low_swing.power = l_wire->power;
- delete l_wire;
-
-}
diff --git a/cacti/wire.h b/cacti/wire.h
deleted file mode 100644
index ce2ddf3..0000000
--- a/cacti/wire.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*****************************************************************************
- * McPAT/CACTI
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-
-
-#ifndef __WIRE_H__
-#define __WIRE_H__
-
-#include "basic_circuit.h"
-#include "component.h"
-#include "parameter.h"
-#include "assert.h"
-#include "cacti_interface.h"
-#include
-#include
-
-class Wire : public Component
-{
- public:
- Wire(enum Wire_type wire_model, double len /* in u*/,
- int nsense = 1/* no. of sense amps connected to the low-swing wire */,
- double width_scaling = 1,
- double spacing_scaling = 1,
- enum Wire_placement wire_placement = outside_mat,
- double resistivity = CU_RESISTIVITY,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
- ~Wire();
-
- Wire( double width_scaling = 1,
- double spacing_scaling = 1,
-// bool reset_repeater_sizing = true,
- enum Wire_placement wire_placement = outside_mat,
- double resistivity = CU_RESISTIVITY,
- TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
- ); // should be used only once for initializing static members
- void init_wire(/*bool reset_repeater_sizing = true*/);
-
- void calculate_wire_stats();
- void delay_optimal_wire(/*bool reset_repeater_sizing = true*/);
- double wire_cap(double len, bool call_from_outside=false);
- double wire_res(double len);
- void low_swing_model();
- double signal_fall_time();
- double signal_rise_time();
- double sense_amp_input_cap();
-
- enum Wire_type wt;
- double wire_spacing;
- double wire_width;
- enum Wire_placement wire_placement;
- double repeater_size;
- double repeater_spacing;
- static double repeater_size_init; // value used in initialization should not be reused in final output
- static double repeater_spacing_init;
- double wire_length;
- double in_rise_time, out_rise_time;
-
- void set_in_rise_time(double rt)
- {
- in_rise_time = rt;
- }
- static Component global;
- static Component global_5;
- static Component global_10;
- static Component global_20;
- static Component global_30;
- static Component low_swing;
- static double wire_width_init;
- static double wire_spacing_init;
- static void print_wire();
- void wire_dvs_update();
-
- private:
-
- int nsense; // no. of sense amps connected to a low-swing wire if it
- // is broadcasting data to multiple destinations
- // width and spacing scaling factor can be used
- // to model low level wires or special
- // fat wires
- double w_scale, s_scale;
- double resistivity;
- powerDef wire_model (double space, double size, double *delay);
- list repeated_wire;
- void update_fullswing();
- static int initialized;
-
-
- //low-swing
- Component transmitter;
- Component l_wire;
- Component sense_amp;
-
- double min_w_pmos;
-
- TechnologyParameter::DeviceType *deviceType;
-
-};
-
-#endif
diff --git a/core.cc b/core.cc
deleted file mode 100644
index 562fd0e..0000000
--- a/core.cc
+++ /dev/null
@@ -1,4437 +0,0 @@
-/*****************************************************************************
- * McPAT
- * SOFTWARE LICENSE AGREEMENT
- * Copyright 2012 Hewlett-Packard Development Company, L.P.
- * All Rights Reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
-
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
- *
- ***************************************************************************/
-
-#include "io.h"
-#include "parameter.h"
-#include "const.h"
-#include "basic_circuit.h"
-#include
-#include
-#include "XML_Parse.h"
-#include
-#include
-#include
-#include "core.h"
-//#include "globalvar.h"
-
-InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IB (0),
- BTB (0),
- ID_inst (0),
- ID_operand (0),
- ID_misc (0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false, is_default = true;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
- //Assuming all L1 caches are virtually idxed physically tagged.
- //cache
-
- size = (int)XML->sys.core[ithCore].icache.icache_config[0];
- line = (int)XML->sys.core[ithCore].icache.icache_config[1];
- assoc = (int)XML->sys.core[ithCore].icache.icache_config[2];
- banks = (int)XML->sys.core[ithCore].icache.icache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- // interface_ip.obj_func_dyn_energy = 0;
- // interface_ip.obj_func_dyn_power = 0;
- // interface_ip.obj_func_leak_power = 0;
- // interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
- scktRatio = g_tp.sckt_co_eff;
- chip_PR_overhead = g_tp.chip_layout_overhead;
- macro_PR_overhead = g_tp.macro_layout_overhead;
- icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
- area.set_area(area.get_area()+ icache.caches->local_result.area);
- //output_data_csv(icache.caches.local_result);
-
-
- /*
- *iCache controllers
- *miss buffer Each MSHR contains enough state
- *to handle one or more accesses of any type to a single memory line.
- *Due to the generality of the MSHR mechanism,
- *the amount of state involved is non-trivial:
- *including the address, pointers to the cache entry and destination register,
- *written data, and various other pieces of state.
- */
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
- area.set_area(area.get_area()+ icache.missb->local_result.area);
- //output_data_csv(icache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = icache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
- area.set_area(area.get_area()+ icache.ifb->local_result.area);
- //output_data_csv(icache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
- icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
- //output_data_csv(icache.prefetchb.local_result);
-
- //Instruction buffer
- data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- interface_ip.pure_cam = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
- XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
- area.set_area(area.get_area()+ IB->local_result.area);
- //output_data_csv(IB.IB.local_result);
-
- // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
- // inst_decoder.init_decoder(is_default, &interface_ip);
- // inst_decoder.full_decoder_power();
-
- if (coredynp.predictionW>0)
- {
- /*
- * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
- * It is only a cache without all the buffers in the cache controller since it is more like a
- * look up table than a cache with cache controller. When access miss, no load from other places
- * such as main memory (not actively fill the misses), it is passively updated under two circumstances:
- * 1) when BPT@ID stage finds out current is a taken branch while BTB missed
- * 2) When BPT@ID stage predicts differently than BTB
- * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
- * 4) when EXEU find out wrong target has been provided from BTB.
- *
- */
- size = XML->sys.core[ithCore].BTB.BTB_config[0];
- line = XML->sys.core[ithCore].BTB.BTB_config[1];
- assoc = XML->sys.core[ithCore].BTB.BTB_config[2];
- banks = XML->sys.core[ithCore].BTB.BTB_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
-// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- interface_ip.pure_cam = false;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:size;
- interface_ip.line_sz = debug?64:line;
- interface_ip.assoc = debug?8:assoc;
- interface_ip.nbanks = debug?1:banks;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
- area.set_area(area.get_area()+ BTB->local_result.area);
- ///cout<<"area="<area.get_area());
- }
-
- ID_inst = new inst_decoder(is_default, &interface_ip,
- coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_operand = new inst_decoder(is_default, &interface_ip,
- coredynp.arch_ireg_width, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
-
- ID_misc = new inst_decoder(is_default, &interface_ip,
- 8/* Prefix field etc upto 14B*/, 1,
- coredynp.x86,
- Core_device, coredynp.core_ty);
- //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
- //So the dynamic power should be multiplied by a few times.
- area.set_area(area.get_area()+ (ID_inst->area.get_area()
- +ID_operand->area.get_area()
- +ID_misc->area.get_area())*coredynp.decodeW);
-
-}
-
-
-BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- globalBPT(0),
- localBPT(0),
- L1_localBPT(0),
- L2_localBPT(0),
- chooser(0),
- RAS(0),
- exist(exist_)
-{
- /*
- * Branch Predictor, accessed during ID stage.
- * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
- * including global predictor, local two level predictor, and Chooser.
- * The Branch predictor also includes a RAS (return address stack) for function calls
- * Branch predictors are tagged by thread ID and modeled as 1-way associative cache.
- * However RAS return address stacks are duplicated for each thread.
- * TODO:Data Width need to be computed more precisely *
- */
- if (!exist) return;
- int tag, data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.assoc = 1;
- interface_ip.pure_cam = false;
- if (coredynp.multithreaded)
- {
-
- tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
-
- interface_ip.is_cache = true;
- interface_ip.pure_ram = false;
- }
- else
- {
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
-
- }
- //Global predictor
- data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
- area.set_area(area.get_area()+ globalBPT->local_result.area);
-
- //Local BPT (Level 1)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
- area.set_area(area.get_area()+ L1_localBPT->local_result.area);
-
- //Local BPT (Level 2)
- data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
- L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
- area.set_area(area.get_area()+ L2_localBPT->local_result.area);
-
- //Chooser
- data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
- chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
- area.set_area(area.get_area()+ chooser->local_result.area);
-
- //RAS return address stacks are Duplicated for each thread.
- interface_ip.is_cache = false;
- interface_ip.pure_ram = true;
- data = int(ceil(coredynp.pc_width/8.0));
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.predictionW;
- interface_ip.num_wr_ports = coredynp.predictionW;
- interface_ip.num_se_rd_ports = 0;
- RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
- RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
- area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
-
-}
-
-SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- int_inst_window(0),
- fp_inst_window(0),
- ROB(0),
- instruction_selection(0),
- exist(exist_)
- {
- if (!exist) return;
- int tag, data;
- bool is_default=true;
- string tmp_name;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
- {
- //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
- tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
- data = XML->sys.core[ithCore].instruction_length;
- //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
- //Software Developer’s Manual
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- //output_data_csv(iRS.RS.local_result);
- Iw_height =int_inst_window->local_result.cache_ht;
-
- /*
- * selection logic
- * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
- * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
- * at the issue stage.
- */
- interface_ip.assoc = 1; //reset to prevent unnecessary warning messages when init_interface
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
- &interface_ip, Core_device, coredynp.core_ty);
- }
-
- if (coredynp.core_ty==OOO)
- {
- /*
- * CAM based instruction window
- * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
- * For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
- * It is written once and read twice(two operands) before an instruction can be issued.
- * X86 instruction can be very long up to 15B. add instruction length in XML
- */
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
- tmp_name = "InstIssueQueue";
- }
- else
- {
- tag = coredynp.phy_ireg_width;
- // Each time only half of the tag is compared, but two tag should be stored.
- // This underestimate the search power
- data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
- 2*coredynp.int_data_width)/2.0)/8.0));
- //Data width being divided by 2 means only after both operands available the whole data will be read out.
- //This is modeled using two equivalent readouts with half of the data width
-
- tmp_name = "IntReservationStation";
- }
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 2*1.0/clockRate;
- interface_ip.latency = 2*1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.peak_issueW;
- int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
- Iw_height =int_inst_window->local_result.cache_ht;
- //FU inst window
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
- tmp_name = "FPIssueQueue";
- }
- else
- {
- tag = 2*coredynp.phy_ireg_width;
- data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
- 2*coredynp.fp_data_width)/8.0));
- tmp_name = "FPReservationStation";
- }
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_issueW;
- interface_ip.num_wr_ports = coredynp.fp_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = coredynp.fp_issueW;
- fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
- fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
- fp_Iw_height =fp_inst_window->local_result.cache_ht;
-
- if (XML->sys.core[ithCore].ROB_size >0)
- {
- /*
- * if ROB_size = 0, then the target processor does not support hardware-based
- * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
- * means branch must be resolved before instruction issued into instruction window, since
- * there is no change to flush miss-predict branch path after instructions are issued in this situation.
- *
- * ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
- * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
- * However, this approach is abandoned due to its high power and poor scalability.
- * McPAT uses current implementation of ROB as circular buffer.
- * ROB is written once when instruction is issued and read once when the instruction is committed. *
- */
-
- int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
- data = int(ceil((robExtra+coredynp.pc_width + ((coredynp.rm_ty ==RAMbased)? (coredynp.phy_ireg_width + coredynp.phy_freg_width) : fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width)) + ((coredynp.scheu_ty==PhysicalRegFile)? 0 : coredynp.fp_data_width ))/8.0));
- /*
- * 5 bits are: busy, Issued, Finished, speculative, valid;
- * PC is to id the instruction for recover exception/mis-prediction.
- * When using RAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT,
- * so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated;
- * otherwise, the RAM-based RAT needs to support search ops to identify the target architecture register that needs to be updated, or the physical resigner that needs to be recycled;
- * When using CAM-based RAT, ROB only needs to contain destination physical register since the CAM-base RAT can search for the corresponding ARF-PRF mapping
- * to find the correct entry in the RAT, so that the correct architecture register (and freelist/bits) can be found and the RAT can be appropriately updated.
- * ROB phy_reg entry should use the larger one from phy_ireg and phy_freg; fdata_width is always larger.
- * Latest Intel Processors may have different ROB/RS designs.
- */
-
-
-
-/*
- if(coredynp.scheu_ty==PhysicalRegFile)
- {
- //PC is to id the instruction for recover exception.
- //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
-
- if (coredynp.rm_ty ==RAMbased)
- {
- data = int(ceil((robExtra + coredynp.pc_width + (coredynp.phy_ireg_width, coredynp.phy_freg_width))/8.0));
- //When using RAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT,
- //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated.
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- data = int(ceil((robExtra+coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width))/8.0));
- //When using CAM-based RAT, ROB needs to contain the ARF-PRF mapping to index the correct entry in the RAT,
- //so that the correct architecture register (and freelist) can be found and the RAT can be appropriately updated.
- }
- }
- else
- {
- //in RS based OOO, ROB also contains value of destination reg
-// data = int(ceil((robExtra+coredynp.pc_width +
-// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
-
- //using phy_reg number to search in the RAT, the correct architecture register can be found and the RAT can be appropriately updated.
- //ROB phy_reg entry should use the larger one from ireg and freg; fdata_width is always larger; Latest Intel Processors may have different ROB/RS designs.
- data = int(ceil((robExtra + coredynp.pc_width + fmax(coredynp.phy_ireg_width, coredynp.phy_freg_width) + coredynp.fp_data_width)/8.0));
- }
-*/
-
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.peak_commitW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = 0;
- ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
- ROB_height =ROB->local_result.cache_ht;
- }
-
- instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
- coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
- }
-}
-
-LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- LSQ(0),
- LoadQ(0),
- exist(exist_)
-{
- if (!exist) return;
- int idx, tag, data, size, line, assoc, banks;
- bool debug= false;
- int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
-
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- //Dcache
- size = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
- line = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
- assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
- banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
- idx = debug?9:int(ceil(log2(size/line/assoc)));
- tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
- interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
- interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
- interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.is_cache = true;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
- area.set_area(area.get_area()+ dcache.caches->local_result.area);
- //output_data_csv(dcache.caches.local_result);
-
- //dCache controllers
- //miss buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
- area.set_area(area.get_area()+ dcache.missb->local_result.area);
- //output_data_csv(dcache.missb.local_result);
-
- //fill buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
- area.set_area(area.get_area()+ dcache.ifb->local_result.area);
- //output_data_csv(dcache.ifb.local_result);
-
- //prefetch buffer
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
- data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
- area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
- //output_data_csv(dcache.prefetchb.local_result);
-
- //WBB
-
- if (cache_p==Write_back)
- {
- tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
- data = dcache.caches->l_ip.line_sz;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
- area.set_area(area.get_area()+ dcache.wbb->local_result.area);
- //output_data_csv(dcache.wbb.local_result);
- }
-
- /*
- * LSU--in-order processors do not have separate load queue: unified lsq
- * partitioned among threads
- * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
- */
- tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
- data = XML->sys.machine_bits;
- interface_ip.is_cache = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
- area.set_area(area.get_area()+ LSQ->local_result.area);
- //output_data_csv(LSQ.LSQ.local_result);
- lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
-
- if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
- {
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
- LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
- LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
- area.set_area(area.get_area()+ LoadQ->local_result.area);
- //output_data_csv(LoadQ.LoadQ.local_result);
- lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
- }
- area.set_area(area.get_area()*cdb_overhead);
-}
-
-MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- itlb(0),
- dtlb(0),
- exist(exist_)
-{
- if (!exist) return;
- int tag, data;
- bool debug= false;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.specific_tag = 1;
- //Itlb TLBs are partioned among threads according to Nigara and Nehalem
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
- itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
- area.set_area(area.get_area()+ itlb->local_result.area);
- //output_data_csv(itlb.tlb.local_result);
-
- //dtlb
- tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
- data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 0;
- interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
- interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
- dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
- dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
- area.set_area(area.get_area()+ dtlb->local_result.area);
- //output_data_csv(dtlb.tlb.local_result);
-
-}
-
-RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- IRF (0),
- FRF (0),
- RFWIN (0),
- exist(exist_)
- {
- /*
- * processors have separate architectural register files for each thread.
- * therefore, the bypass buses need to travel across all the register files.
- */
-
- if (!exist) return;
- int data;
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- //**********************************IRF***************************************
- data = coredynp.int_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*coredynp.peak_issueW;
- interface_ip.num_wr_ports = coredynp.peak_issueW;
- interface_ip.num_se_rd_ports = 0;
- IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*coredynp.num_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1));
- area.set_area(area.get_area()+ IRF->local_result.area*coredynp.num_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1));
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(IRF.RF.local_result);
-
- //**********************************FRF***************************************
- data = coredynp.fp_data_width;
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/32.0))*4;
- interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width;
- interface_ip.num_se_rd_ports = 0;
- FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
- FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*coredynp.num_fp_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1));
- area.set_area(area.get_area()+ FRF->local_result.area*coredynp.num_fp_pipelines*cdb_overhead*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1));
- //area.set_area(area.get_area()*cdb_overhead);
- //output_data_csv(FRF.RF.local_result);
- int_regfile_height= IRF->local_result.cache_ht*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)*sqrt(cdb_overhead);
- fp_regfile_height = FRF->local_result.cache_ht*((coredynp.scheu_ty==ReservationStation)?XML->sys.core[ithCore].number_hardware_threads:1)*sqrt(cdb_overhead);
- //since a EXU is associated with each pipeline, the cdb should not have longer length.
- if (coredynp.regWindowing)
- {
- //*********************************REG_WIN************************************
- data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = int(ceil(data/8.0));
- interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 4.0/clockRate;
- interface_ip.latency = 4.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
- interface_ip.num_rd_ports = 0;
- interface_ip.num_wr_ports = 0;
- interface_ip.num_se_rd_ports = 0;
- RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
- RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
- //output_data_csv(RFWIN.RF.local_result);
- }
-
-
- }
-
-EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- lsq_height(lsq_height_),
- coredynp(dyn_p_),
- rfu(0),
- scheu(0),
- fp_u(0),
- exeu(0),
- mul(0),
- int_bypass(0),
- intTagBypass(0),
- int_mul_bypass(0),
- intTag_mul_Bypass(0),
- fp_bypass(0),
- fpTagBypass(0),
- exist(exist_)
-{
- bool exist_flag = true;
- if (!exist) return;
- double fu_height = 0.0;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- rfu = new RegFU(XML, ithCore, &interface_ip,coredynp);
- scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
- exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
- area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
- fu_height = exeu->FU_height;
- if (coredynp.num_fpus >0)
- {
- fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
- area.set_area(area.get_area()+ fp_u->area.get_area());
- }
- if (coredynp.num_muls >0)
- {
- mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
- area.set_area(area.get_area()+ mul->area.get_area());
- fu_height += mul->FU_height;
- }
- /*
- * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
- * integer by pass has two paths and fp has 3 paths.
- * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
- */
- if (XML->sys.Embedded)
- {
- interface_ip.wt =Global_30;
- interface_ip.wire_is_mat_type = 0;
- interface_ip.wire_os_mat_type = 0;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- }
- else
- {
- interface_ip.wt =Global;
- interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
- interface_ip.wire_os_mat_type = 2;
- interface_ip.throughput = 10.0/clockRate; //Do not care
- interface_ip.latency = 10.0/clockRate;
- }
-
- if (coredynp.core_ty==Inorder)
- {
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {//OOO
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- /* For physical register based OOO,
- * data broadcast interconnects cover across functional units, lsq, inst windows and register files,
- * while tag broadcast interconnects also cover across ROB
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
-
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
- else
- {
- /*
- * In RS based processor both data and tag are broadcast together,
- * covering functional units, lsq, nst windows, register files, and ROBs
- */
- int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
- if (coredynp.num_muls>0)
- {
- int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
- rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
- }
-
- if (coredynp.num_fpus>0)
- {
- fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
- rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
- false, 1.0, coredynp.opt_local, coredynp.core_ty);
- bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
- bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
- }
- }
-
-
- }
- area.set_area(area.get_area()+ bypass.area.get_area());
-}
-
-RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- coredynp(dyn_p_),
- iFRAT(0),
- fFRAT(0),
- iRRAT(0),
- fRRAT(0),
- ifreeL(0),
- ffreeL(0),
- idcl(0),
- fdcl(0),
- RAHT(0),
- exist(exist_)
- {
- /*
- * Although renaming logic maybe be used in in-order processors,
- * McPAT assumes no renaming logic is used since the performance gain is very limited and
- * the only major inorder processor with renaming logic is Itainium
- * that is a VLIW processor and different from current McPAT's model.
- * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
- * i,f prefix mean int and fp
- * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
- * FRAT will be read twice and written once per instruction;
- * RRAT will be write once per instruction when committing and reads out all when context switch
- *
- * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
- * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
- *
- * RAM-based RAT is duplicated/partitioned for each different hardware threads
- * CAM-based RAT is shared for all hardware threads
- * With SMT, RAT is partitioned and tagged. RAM-based RAT needs to have N (N-way SMT) sets of entries, with each set for a thread.
- * The RAT control logic will determine different sets to use for different threads. But it does not need extra tag bits in the entries.
- * However, CAM-based RAT need extra tag bits to distinguish the architecture register ids for different threads.
-
- *
- * checkpointing of RAT and RRAT are both for architecture state recovery with events including mis-speculation;
- * Checkpointing is easier to implement in CAM than in RAM based RAT, despite of the inferior scalabilty of the CAM-based RATs.
- * McPAT assumes at least 1 checkpoint for CAM-based RATs, and no more than 4 checkpoints (based on MIPS designs) for RAM based RATs,
- * thus CAM-based RAT does not need RRAT
- * Although no Dual-RAT is needed in RS-based OOO processors, since archi RegFile contains the committed register values,
- * a RRAT or GC (not both) will speedup the mis-speculation recovery. Thus, when RAM-RAT does not have any GC, McPAT assumes the existence of a RRAT.
- *
- * RAM-base RAT does not need to scan/search all contents during instruction commit, since the ROB for RAM-based RAT contains the ARF-PRF mapping that is used for index the RAT entry to be updated.
- *
- * Both RAM and CAM have same DCL
- *
-
- *
- */
- if (!exist) return;
- int tag, data, out_w;
-// interface_ip.wire_is_mat_type = 0;
-// interface_ip.wire_os_mat_type = 0;
-// interface_ip.wt = Global_30;
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- if (coredynp.core_ty==OOO)
- {
- //integer pipeline
- if (coredynp.scheu_ty==PhysicalRegFile)
- {
- if (coredynp.rm_ty ==RAMbased)
- { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explanation.
- data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));//33;
- out_w = int(ceil(coredynp.phy_ireg_width/8.0));//bytes
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT floating point
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width + coredynp.hthread_width;
- data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint )/8.0));//each checkpoint in the CAM-based RAT design needs only 1 bit, see "a power-aware hybrid ram-cam renaming mechanism for fast recovery"
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.decodeW;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT for FP
- tag = coredynp.arch_freg_width + coredynp.hthread_width;
- data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//each checkpoint in the CAM-based RAT design needs only 1 bit, see "a power-aware hybrid ram-cam renaming mechanism for fast recovery"
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
-
- //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
- //RRAT is not needed for CAM-based RAT (McPAT assumes CAM-based RAT to have at least 1 checkpoint), it is not needed for RAM-based RAT with checkpoints
- //McPAT assumes renaming unit to have RRAT when there is no checkpoints in FRAT, while MIPS R1000 has 4 GCs, according to Intel Netburst Archi, combine GC with FRAT is very costly, especially for high issue width and high clock rate.
-
- if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1))
- {
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_se_rd_ports = 0;
- iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area);
- area.set_area(area.get_area()+ iRRAT->area.get_area());
-
- //RRAT for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area);
- area.set_area(area.get_area()+ fRRAT->area.get_area());
- }
- //Freelist of renaming unit always RAM based and needed for RAM-based RATs.
- //Although it can be implemented within the CAM-based RAT,
- //Current McPAT does not have the free bits in the CAM but use the same external free list as a close approximation for CAM RAT.
- //Recycle happens at two places: 1)when DCL check there are WAW, the Phy-registers/ROB directly recycles into freelist
- // 2)When instruction commits the Phyregisters/ROB needed to be recycled.
- //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
- //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
- interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
-
- //freelist for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ffreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
- interface_ip.num_se_rd_ports = 0;
- ffreeL = new ArrayST(&interface_ip, "FP Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area);
- area.set_area(area.get_area()+ ffreeL->area.get_area());
-
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
-
- }
- else if (coredynp.scheu_ty==ReservationStation){
- if (coredynp.rm_ty ==RAMbased){
-
- data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_ireg_width/8.0));//GC does not need to be readout
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->local_result.adjust_area();
-// iFRAT->local_result.power.readOp.dynamic *= 1+0.2*0.05;//1+mis-speculation% TODO
-// iFRAT->local_result.power.writeOp.dynamic *=1+0.2*0.05;//compensate for GC
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FP
- data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*XML->sys.core[ithCore].number_hardware_threads;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//the extra one port is for GCs
- interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->local_result.adjust_area();
-// fFRAT->local_result.power.readOp.dynamic *= 1+0.2*0.05;//1+mis-speculation% TODO
-// fFRAT->local_result.power.writeOp.dynamic *=1+0.2*0.05;//compensate for GC
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- else if ((coredynp.rm_ty ==CAMbased))
- {
- //FRAT
- tag = coredynp.arch_ireg_width + coredynp.hthread_width;
- data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));
- out_w = int(ceil (coredynp.arch_ireg_width/8.0));//GC bits does not need to be sent out
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.decodeW;
- iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area);
- area.set_area(area.get_area()+ iFRAT->area.get_area());
-
- //FRAT
- tag = coredynp.arch_freg_width + coredynp.hthread_width;
- data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
- out_w = int(ceil (coredynp.arch_freg_width/8.0));
- interface_ip.is_cache = true;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = false;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
- interface_ip.assoc = 0;
- interface_ip.nbanks = 1;
- interface_ip.out_w = out_w*8;
- interface_ip.specific_tag = 1;
- interface_ip.tag_w = tag;
- interface_ip.access_mode = 2;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//for GCs
- interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
- fFRAT = new ArrayST(&interface_ip, "FP FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area);
- area.set_area(area.get_area()+ fFRAT->area.get_area());
-
- }
- //Although no RRAT for RS based OOO is really needed since the archiRF always holds the non-speculative data, having the RRAT or GC (not both) can help the recovery of mis-speculations.
-
- if ((coredynp.rm_ty ==RAMbased) && (coredynp.globalCheckpoint<1))
- {
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
- interface_ip.num_se_rd_ports = 0;
- iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area);
- area.set_area(area.get_area()+ iRRAT->area.get_area());
-
- //RRAT for FP
- data = int(ceil(coredynp.phy_freg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2*XML->sys.core[ithCore].number_hardware_threads;//HACK--2 to make it as least 64B
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 0;
- interface_ip.num_rd_ports = coredynp.fp_decodeW;
- interface_ip.num_wr_ports = coredynp.fp_decodeW;
- interface_ip.num_se_rd_ports = 0;
- fRRAT = new ArrayST(&interface_ip, "FP RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
- fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area);
- area.set_area(area.get_area()+ fRRAT->area.get_area());
- }
-
- //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
- data = int(ceil(coredynp.phy_ireg_width/8.0));
- interface_ip.is_cache = false;
- interface_ip.pure_cam = false;
- interface_ip.pure_ram = true;
- interface_ip.line_sz = data;
- interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
- interface_ip.assoc = 1;
- interface_ip.nbanks = 1;
- interface_ip.out_w = interface_ip.line_sz*8;
- interface_ip.access_mode = 1;
- interface_ip.throughput = 1.0/clockRate;
- interface_ip.latency = 1.0/clockRate;
- interface_ip.obj_func_dyn_energy = 0;
- interface_ip.obj_func_dyn_power = 0;
- interface_ip.obj_func_leak_power = 0;
- interface_ip.obj_func_cycle_t = 1;
- interface_ip.num_rw_ports = 1;//TODO
- interface_ip.num_rd_ports = coredynp.decodeW;
- interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
- interface_ip.num_se_rd_ports = 0;
- ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
- //ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
- area.set_area(area.get_area()+ ifreeL->area.get_area());
-
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
- }
-
-}
- if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
- {
- /* Dependency check logic will only present when decode(issue) width>1.
- * Multiple issue in order processor can do without renaming, but dcl is a must.
- */
- idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
- fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
- }
-}
-
-Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
-:XML(XML_interface),
- ithCore(ithCore_),
- interface_ip(*interface_ip_),
- ifu (0),
- lsu (0),
- mmu (0),
- exu (0),
- rnu (0),
- corepipe (0),
- undiffCore (0),
- l2cache (0)
-{
- /*
- * initialize, compute and optimize individual components.
- */
-
- bool exit_flag = true;
-
- double pipeline_area_per_unit;
- // interface_ip.wire_is_mat_type = 2;
- // interface_ip.wire_os_mat_type = 2;
- // interface_ip.wt =Global_30;
- set_core_param();
-
- if (XML->sys.Private_L2)
- {
- l2cache = new SharedCache(XML,ithCore, &interface_ip);
-
- }
-
- clockRate = coredynp.clockRate;
- executionTime = coredynp.executionTime;
- ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp,exit_flag);
- lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp,exit_flag);
- mmu = new MemManU (XML, ithCore, &interface_ip,coredynp,exit_flag);
- exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp,exit_flag);
- undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp,exit_flag);
- if (coredynp.core_ty==OOO)
- {
- rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
- }
- corepipe = new Pipeline(&interface_ip,coredynp);
-
- if (coredynp.core_ty==OOO)
- {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
- if (rnu->exist)
- {
- rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
- }
- }
- else {
- pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
- }
-
- //area.set_area(area.get_area()+ corepipe->area.get_area());
- if (ifu->exist)
- {
- ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + ifu->area.get_area());
- }
- if (lsu->exist)
- {
- lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area() + lsu->area.get_area());
- }
- if (exu->exist)
- {
- exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+exu->area.get_area());
- }
- if (mmu->exist)
- {
- mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
- area.set_area(area.get_area()+mmu->area.get_area());
- }
-
- if (coredynp.core_ty==OOO)
- {
- if (rnu->exist)
- {
-
- area.set_area(area.get_area() + rnu->area.get_area());
- }
- }
-
- if (undiffCore->exist)
- {
- area.set_area(area.get_area() + undiffCore->area.get_area());
- }
-
- if (XML->sys.Private_L2)
- {
- area.set_area(area.get_area() + l2cache->area.get_area());
-
- }
-// //clock power
-// clockNetwork.init_wire_external(is_default, &interface_ip);
-// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
-// clockNetwork.end_wiring_level =5;//toplevel metal
-// clockNetwork.start_wiring_level =5;//toplevel metal
-// clockNetwork.num_regs = corepipe.tot_stage_vector;
-// clockNetwork.optimize_wire();
-}
-
-
-void BranchPredictor::computeEnergy(bool is_tdp)
-{
- if (!exist) return;
- double r_access;
- double w_access;
- if (is_tdp)
- {
- r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
- w_access = 0*coredynp.BR_duty_cycle;
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->tdp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->tdp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->tdp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->tdp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = r_access;
- RAS->stats_t.writeAc.access = w_access;
- RAS->tdp_stats = RAS->stats_t;
- }
- else
- {
- //The resolution of BPT accesses is coarse, but this is
- //because most simulators cannot track finer grained details
- r_access = XML->sys.core[ithCore].branch_instructions;
- w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
- globalBPT->stats_t.readAc.access = r_access;
- globalBPT->stats_t.writeAc.access = w_access;
- globalBPT->rtp_stats = globalBPT->stats_t;
-
- L1_localBPT->stats_t.readAc.access = r_access;
- L1_localBPT->stats_t.writeAc.access = w_access;
- L1_localBPT->rtp_stats = L1_localBPT->stats_t;
-
- L2_localBPT->stats_t.readAc.access = r_access;
- L2_localBPT->stats_t.writeAc.access = w_access;
- L2_localBPT->rtp_stats = L2_localBPT->stats_t;
-
- chooser->stats_t.readAc.access = r_access;
- chooser->stats_t.writeAc.access = w_access;
- chooser->rtp_stats = chooser->stats_t;
-
- RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls;
- RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
- RAS->rtp_stats = RAS->stats_t;
- }
-
- globalBPT->power_t.reset();
- L1_localBPT->power_t.reset();
- L2_localBPT->power_t.reset();
- chooser->power_t.reset();
- RAS->power_t.reset();
-
- globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
- globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
- L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
- L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
-
- L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
- L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
-
- chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
- chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
- RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
- RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
-
- if (is_tdp)
- {
- globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
-
- power = power + globalBPT->power + L1_localBPT->power + L2_localBPT->power + chooser->power + RAS->power;
- }
- else
- {
- globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
- L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
- L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
- chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
- RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
- rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + L2_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
- }
-}
-
-void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- bool power_gating = XML->sys.power_gating;
- if (is_tdp)
- {
- cout << indent_str<< "Global Predictor:" << endl;
- cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel? globalBPT->power.readOp.power_gated_with_long_channel_leakage : globalBPT->power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel ? L1_localBPT->power.readOp.power_gated_with_long_channel_leakage : L1_localBPT->power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel ? L2_localBPT->power.readOp.power_gated_with_long_channel_leakage : L2_localBPT->power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel? chooser->power.readOp.power_gated_with_long_channel_leakage : chooser->power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <area.get_area() *1e-6 << " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel? RAS->power.readOp.power_gated_with_long_channel_leakage : RAS->power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
- icache.caches->stats_t.readAc.miss = 0;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->tdp_stats = icache.caches->stats_t;
-
- icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.missb->tdp_stats = icache.missb->stats_t;
-
- icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.ifb->tdp_stats = icache.ifb->stats_t;
-
- icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports*coredynp.IFU_duty_cycle;
- icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
-
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
- IB->tdp_stats = IB->stats_t;
-
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
- BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
- }
-
- ID_inst->stats_t.readAc.access = coredynp.decodeW;
- ID_operand->stats_t.readAc.access = coredynp.decodeW;
- ID_misc->stats_t.readAc.access = coredynp.decodeW;
- ID_inst->tdp_stats = ID_inst->stats_t;
- ID_operand->tdp_stats = ID_operand->stats_t;
- ID_misc->tdp_stats = ID_misc->stats_t;
-
-
- }
- else
- {
- //init stats for Runtime Dynamic (RTP)
- icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses;
- icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses;
- icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
- icache.caches->rtp_stats = icache.caches->stats_t;
-
- icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.missb->rtp_stats = icache.missb->stats_t;
-
- icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.ifb->rtp_stats = icache.ifb->stats_t;
-
- icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
- icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
-
- IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
- IB->rtp_stats = IB->stats_t;
-
- if (coredynp.predictionW>0)
- {
- BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
- BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
- BTB->rtp_stats = BTB->stats_t;
- }
-
- ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
- ID_inst->rtp_stats = ID_inst->stats_t;
- ID_operand->rtp_stats = ID_operand->stats_t;
- ID_misc->rtp_stats = ID_misc->stats_t;
-
- }
-
- icache.power_t.reset();
- IB->power_t.reset();
-// ID_inst->power_t.reset();
-// ID_operand->power_t.reset();
-// ID_misc->power_t.reset();
- if (coredynp.predictionW>0)
- {
- BTB->power_t.reset();
- }
-
- icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
- //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
- icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
- icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
- icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
- icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
- icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
- icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
- icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
-
- IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
- IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
-
- if (coredynp.predictionW>0)
- {
- BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
- BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
-
- BPT->computeEnergy(is_tdp);
- }
-
- if (is_tdp)
- {
-// icache.power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
- icache.power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
- power = power + icache.power + IB->power;
- if (coredynp.predictionW>0)
- {
- BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- power = power + BTB->power + BPT->power;
- }
-
- ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic;
- ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
- ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic;
-
- ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access;
- ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
- ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access;
-
- power = power + (ID_inst->power +
- ID_operand->power +
- ID_misc->power);
- }
- else
- {
-// icache.rt_power = icache.power_t +
-// (icache.caches->local_result.power)*pppm_lkg +
-// (icache.missb->local_result.power +
-// icache.ifb->local_result.power +
-// icache.prefetchb->local_result.power)*pppm_Isub;
-
- icache.rt_power = icache.power_t +
- (icache.caches->local_result.power +
- icache.missb->local_result.power +
- icache.ifb->local_result.power +
- icache.prefetchb->local_result.power)*pppm_lkg;
-
- IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
- rt_power = rt_power + icache.rt_power + IB->rt_power;
- if (coredynp.predictionW>0)
- {
- BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
- rt_power = rt_power + BTB->rt_power + BPT->rt_power;
- }
-
- ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
- ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
- ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
-
- rt_power = rt_power + (ID_inst->rt_power +
- ID_operand->rt_power +
- ID_misc->rt_power);
- }
-}
-
-void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
-{
- if (!exist) return;
- string indent_str(indent, ' ');
- string indent_str_next(indent+2, ' ');
- bool long_channel = XML->sys.longer_channel_device;
- bool power_gating = XML->sys.power_gating;
-
- if (is_tdp)
- {
-
- cout << indent_str<< "Instruction Cache:" << endl;
- cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
- cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
- cout << indent_str_next << "Subthreshold Leakage = "
- << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
- if (power_gating) cout << indent_str_next << "Subthreshold Leakage with power gating = "
- << (long_channel? icache.power.readOp.power_gated_with_long_channel_leakage : icache.power.readOp.power_gated_leakage) << " W" << endl;
- cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
- cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
- cout <